RubyGems - food_ingredient_parser - Versions diffs - 1.0.0 → 1.1.0 - Mend

food_ingredient_parser 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/README.md +3 -3
data/lib/food_ingredient_parser/loose/node.rb +4 -0
data/lib/food_ingredient_parser/loose/scanner.rb +5 -4
data/lib/food_ingredient_parser/loose/to_html.rb +66 -0
data/lib/food_ingredient_parser/loose/transform/handle_missing_name.rb +6 -1
data/lib/food_ingredient_parser/version.rb +2 -2
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2c529b63bd3a9f6139ed10663b2cef70ff3d1dc6
-  data.tar.gz: a8effec91559e15920794c61b08a486e572032cf
+  metadata.gz: 629792af43b06c646b98c2ab8ee136895123e3e5
+  data.tar.gz: 6abbe88e79943cedb8b8776146aab6fdffd427e1
 SHA512:
-  metadata.gz: 35f27c7d83effc16962a65ac4c8c09fb5694373dbd3d2745c434c37ddcf3fc466264c0f10cbe5054876517839a81bf75ab0b1b9876098c1b04b5312138e06ea1
-  data.tar.gz: 28e517777928262b45836d899ff919f725e09ab6e116fd9f58545262b7ae7151821ae683e6d33146d3b5e20f5c02ad7217c4985267460645b0ba80e1ccc19751
+  metadata.gz: c1728c6edc995f6a5a18b82eae0e217fa18814c75f1b34d33d52abc4f593428020ba6a425ea526519036a5d4a412a80b43aae55147b2a07bae0e0979394b26bd
+  data.tar.gz: a9f43ad8e20ab867293e1db59aeb5b7bd6147a6e560adb58159f345d441c6445be453b7eb4ba01bdd4169ae951444e4bf90a6e7873cf85cfa7557b4ac4859d36

data/README.md CHANGED Viewed

@@ -121,7 +121,7 @@ When ingredient lists are entered manually, it can be very useful to show how th
 recognized. This can help understanding why a certain ingredients list cannot be parsed.
 For this you can use the `to_html` method on the parsed output, which returns the original
-text, augmented with CSS classes for different parts. (Available for strict parser only.)
+text, augmented with CSS classes for different parts.
 ```ruby
 require 'food_ingredient_parser'
@@ -178,8 +178,8 @@ Even though the strict parser would not give a result, the loose parser returns:
 From the 1.0.0 release, the main interface will be stable. This comprises the two parser's `parse`
 methods (incl. documented options), its `nil` result when parsing failed, and the parsed output's
-`to_h` and `to_html` methods (where available). Please note that parsed node trees may be subject to
-change, even within a major release. Within a minor release, node trees are expected to remain stable.
+`to_h` and `to_html` methods. Please note that parsed node trees may be subject to change, even within
+a major release. Within a minor release, node trees are expected to remain stable.
 So if you only use the stable interface (`parse`, `to_h` and `to_html`), you can lock your version
 to e.g. `~> 1.0`. If you depend on more, lock your version against e.g. `~> 1.0.0` and test when you

data/lib/food_ingredient_parser/loose/node.rb CHANGED Viewed

@@ -1,6 +1,10 @@
+require_relative 'to_html'
 module FoodIngredientParser::Loose
   # Parsing result.
   class Node
+    include ToHtml
     attr_accessor :name, :mark, :amount, :contains, :notes
     attr_reader :input, :interval, :auto_close

data/lib/food_ingredient_parser/loose/scanner.rb CHANGED Viewed

@@ -149,9 +149,10 @@ module FoodIngredientParser::Loose
     def is_notes_start?
       # @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
-      if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) ||     # "* = Biologisch"
-         ( is_mark? && @s[@i-1] =~ /\s/ ) ||                    # " **Biologisch"
-         ( @s[@i..-1] =~ NOTE_RE )                              # "E=", "Kan sporen van", ...
+      ml = mark_len
+      if ( is_mark? && @s[@i+ml .. -1] =~ /\A\s*=/ ) ||                      # "* = Biologisch"
+         ( is_mark? && @s[@i-1] =~ /\s/ && @s[@i+ml .. -1] =~ /\A\s*\w/ ) || # " **Biologisch"
+         ( @s[@i..-1] =~ NOTE_RE )                                           # "E=", "Kan sporen van", ...
         @i -= 1 # we want to include the mark in the note
         true
       # End of sentence
@@ -196,7 +197,7 @@ module FoodIngredientParser::Loose
       cur.name ||= begin
         i, j = cur.interval.first, @i - 1
         i += mark_len(i) # skip any mark in front
-        Node.new(@s, i .. j) if j > i
+        Node.new(@s, i .. j) if j >= i
       end
     end

data/lib/food_ingredient_parser/loose/to_html.rb ADDED Viewed

@@ -0,0 +1,66 @@
+require 'cgi'
+# Adds HTML output functionality to a Node.
+#
+module FoodIngredientParser::Loose
+  module ToHtml
+    # Markup original ingredients list text in HTML.
+    #
+    # The input text is returned as HTML, augmented with CSS classes
+    # on +span+s for +name+, +amount+, +mark+ and +note+.
+    #
+    # @return [String] HTML representation of ingredient list.
+    def to_html
+      node_to_html(self)
+    end
+    private
+    def node_to_html(node, depth=0)
+      children = [*node.contains, *node.notes, node.name, node.amount, node.mark].compact
+      children.sort_by! {|n| n.interval.first }
+      html = ""
+      last_idx = node.interval.first - 1
+      children.each do |child|
+        # we don't have nodes for all characters, make sure they are in the output
+        if child.interval.first - 1 > last_idx
+          html += CGI.escapeHTML(node.input[last_idx + 1 .. child.interval.first - 1])
+          last_idx = child.interval.first - 1
+        end
+        if child == node.name
+          html += node_to_html_single(child, "name")
+          last_idx = child.interval.last
+        elsif child == node.amount
+          html += node_to_html_single(child, "amount")
+          last_idx = child.interval.last
+        elsif child == node.mark
+          html += node_to_html_single(child, "mark")
+          last_idx = child.interval.last
+        elsif node.notes.include?(child)
+          html += node_to_html_single(child, "note")
+          last_idx = child.interval.last
+        elsif node.contains.include?(child)
+          cls = "depth#{depth}"
+          cls = "contains #{cls}" if depth > 0
+          html += "<span class='#{cls}'>#{node_to_html(child, depth + 1)}</span>"
+          last_idx = child.interval.last
+        end
+      end
+      # include any trailing characters
+      if children.any? && last_idx < node.interval.last
+        html += CGI.escapeHTML(node.input[last_idx + 1 .. node.interval.last])
+      end
+      html
+    end
+    def node_to_html_single(node, cls=nil)
+      ws1, txt, ws2 = node.text_value.match(/\A(\s*)(.*?)(\s*)\z/).captures.map {|s| CGI.escapeHTML(s)}
+      cls && txt.size > 0 ? "#{ws1}<span class='#{cls}'>#{txt}</span>#{ws2}" : "#{ws1}#{txt}#{ws2}"
+    end
+  end
+end

data/lib/food_ingredient_parser/loose/transform/handle_missing_name.rb CHANGED Viewed

@@ -8,7 +8,9 @@ module FoodIngredientParser::Loose
     #
     # When a contained node is found which doesn't have a name:
     # * For the amount (if any): ignore it (as it's often ambiguous which ingredient it belongs to)
-    # * For the marks (if any): ignore it (we might instead add it to the containing ingredients)
+    # * For the marks (if any)
+    #   - if the node has no siblings and no containing ingredients, add the mark to the parent (if any)
+    #   - else ignore it (we might instead add it to the containing ingredients)
     # * For the containing ingredients (if any):
     #   - if the previous ingredient is present and doesn't contain ingredients already,
     #     assume the current contained ingredients are actually part of the previous ingredient.
@@ -45,6 +47,9 @@ module FoodIngredientParser::Loose
             if prev
               # there is a previous ingredient: move children to new parent
               prev.contains.push(*child.contains)
+            elsif child.mark && !node.mark && child.contains.empty? && !child.amount
+              # this is just a mark without siblings: it's a mark for its parent
+              node.mark = child.mark
             else
               # there is no previous ingredient: move children one level up
               new_contains.push(*child.contains)

data/lib/food_ingredient_parser/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module FoodIngredientParser
-  VERSION      = '1.0.0'
-  VERSION_DATE = '2018-09-21'
+  VERSION      = '1.1.0'
+  VERSION_DATE = '2018-09-24'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: food_ingredient_parser
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.1.0
 platform: ruby
 authors:
 - wvengen
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-09-21 00:00:00.000000000 Z
+date: 2018-09-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: treetop
@@ -46,6 +46,7 @@ files:
 - lib/food_ingredient_parser/loose/node.rb
 - lib/food_ingredient_parser/loose/parser.rb
 - lib/food_ingredient_parser/loose/scanner.rb
+- lib/food_ingredient_parser/loose/to_html.rb
 - lib/food_ingredient_parser/loose/transform/amount.rb
 - lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
 - lib/food_ingredient_parser/loose/transform/handle_missing_name.rb