food_ingredient_parser 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c529b63bd3a9f6139ed10663b2cef70ff3d1dc6
4
- data.tar.gz: a8effec91559e15920794c61b08a486e572032cf
3
+ metadata.gz: 629792af43b06c646b98c2ab8ee136895123e3e5
4
+ data.tar.gz: 6abbe88e79943cedb8b8776146aab6fdffd427e1
5
5
  SHA512:
6
- metadata.gz: 35f27c7d83effc16962a65ac4c8c09fb5694373dbd3d2745c434c37ddcf3fc466264c0f10cbe5054876517839a81bf75ab0b1b9876098c1b04b5312138e06ea1
7
- data.tar.gz: 28e517777928262b45836d899ff919f725e09ab6e116fd9f58545262b7ae7151821ae683e6d33146d3b5e20f5c02ad7217c4985267460645b0ba80e1ccc19751
6
+ metadata.gz: c1728c6edc995f6a5a18b82eae0e217fa18814c75f1b34d33d52abc4f593428020ba6a425ea526519036a5d4a412a80b43aae55147b2a07bae0e0979394b26bd
7
+ data.tar.gz: a9f43ad8e20ab867293e1db59aeb5b7bd6147a6e560adb58159f345d441c6445be453b7eb4ba01bdd4169ae951444e4bf90a6e7873cf85cfa7557b4ac4859d36
data/README.md CHANGED
@@ -121,7 +121,7 @@ When ingredient lists are entered manually, it can be very useful to show how th
121
121
  recognized. This can help understanding why a certain ingredients list cannot be parsed.
122
122
 
123
123
  For this you can use the `to_html` method on the parsed output, which returns the original
124
- text, augmented with CSS classes for different parts. (Available for strict parser only.)
124
+ text, augmented with CSS classes for different parts.
125
125
 
126
126
  ```ruby
127
127
  require 'food_ingredient_parser'
@@ -178,8 +178,8 @@ Even though the strict parser would not give a result, the loose parser returns:
178
178
 
179
179
  From the 1.0.0 release, the main interface will be stable. This comprises the two parser's `parse`
180
180
  methods (incl. documented options), its `nil` result when parsing failed, and the parsed output's
181
- `to_h` and `to_html` methods (where available). Please note that parsed node trees may be subject to
182
- change, even within a major release. Within a minor release, node trees are expected to remain stable.
181
+ `to_h` and `to_html` methods. Please note that parsed node trees may be subject to change, even within
182
+ a major release. Within a minor release, node trees are expected to remain stable.
183
183
 
184
184
  So if you only use the stable interface (`parse`, `to_h` and `to_html`), you can lock your version
185
185
  to e.g. `~> 1.0`. If you depend on more, lock your version against e.g. `~> 1.0.0` and test when you
@@ -1,6 +1,10 @@
1
+ require_relative 'to_html'
2
+
1
3
  module FoodIngredientParser::Loose
2
4
  # Parsing result.
3
5
  class Node
6
+ include ToHtml
7
+
4
8
  attr_accessor :name, :mark, :amount, :contains, :notes
5
9
  attr_reader :input, :interval, :auto_close
6
10
 
@@ -149,9 +149,10 @@ module FoodIngredientParser::Loose
149
149
 
150
150
  def is_notes_start?
151
151
  # @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
152
- if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) || # "* = Biologisch"
153
- ( is_mark? && @s[@i-1] =~ /\s/ ) || # " **Biologisch"
154
- ( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
152
+ ml = mark_len
153
+ if ( is_mark? && @s[@i+ml .. -1] =~ /\A\s*=/ ) || # "* = Biologisch"
154
+ ( is_mark? && @s[@i-1] =~ /\s/ && @s[@i+ml .. -1] =~ /\A\s*\w/ ) || # " **Biologisch"
155
+ ( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
155
156
  @i -= 1 # we want to include the mark in the note
156
157
  true
157
158
  # End of sentence
@@ -196,7 +197,7 @@ module FoodIngredientParser::Loose
196
197
  cur.name ||= begin
197
198
  i, j = cur.interval.first, @i - 1
198
199
  i += mark_len(i) # skip any mark in front
199
- Node.new(@s, i .. j) if j > i
200
+ Node.new(@s, i .. j) if j >= i
200
201
  end
201
202
  end
202
203
 
@@ -0,0 +1,66 @@
1
+ require 'cgi'
2
+
3
+ # Adds HTML output functionality to a Node.
4
+ #
5
+ module FoodIngredientParser::Loose
6
+ module ToHtml
7
+
8
+ # Markup original ingredients list text in HTML.
9
+ #
10
+ # The input text is returned as HTML, augmented with CSS classes
11
+ # on +span+s for +name+, +amount+, +mark+ and +note+.
12
+ #
13
+ # @return [String] HTML representation of ingredient list.
14
+ def to_html
15
+ node_to_html(self)
16
+ end
17
+
18
+ private
19
+
20
+ def node_to_html(node, depth=0)
21
+ children = [*node.contains, *node.notes, node.name, node.amount, node.mark].compact
22
+ children.sort_by! {|n| n.interval.first }
23
+
24
+ html = ""
25
+ last_idx = node.interval.first - 1
26
+ children.each do |child|
27
+ # we don't have nodes for all characters, make sure they are in the output
28
+ if child.interval.first - 1 > last_idx
29
+ html += CGI.escapeHTML(node.input[last_idx + 1 .. child.interval.first - 1])
30
+ last_idx = child.interval.first - 1
31
+ end
32
+
33
+ if child == node.name
34
+ html += node_to_html_single(child, "name")
35
+ last_idx = child.interval.last
36
+ elsif child == node.amount
37
+ html += node_to_html_single(child, "amount")
38
+ last_idx = child.interval.last
39
+ elsif child == node.mark
40
+ html += node_to_html_single(child, "mark")
41
+ last_idx = child.interval.last
42
+ elsif node.notes.include?(child)
43
+ html += node_to_html_single(child, "note")
44
+ last_idx = child.interval.last
45
+ elsif node.contains.include?(child)
46
+ cls = "depth#{depth}"
47
+ cls = "contains #{cls}" if depth > 0
48
+ html += "<span class='#{cls}'>#{node_to_html(child, depth + 1)}</span>"
49
+ last_idx = child.interval.last
50
+ end
51
+ end
52
+
53
+ # include any trailing characters
54
+ if children.any? && last_idx < node.interval.last
55
+ html += CGI.escapeHTML(node.input[last_idx + 1 .. node.interval.last])
56
+ end
57
+
58
+ html
59
+ end
60
+
61
+ def node_to_html_single(node, cls=nil)
62
+ ws1, txt, ws2 = node.text_value.match(/\A(\s*)(.*?)(\s*)\z/).captures.map {|s| CGI.escapeHTML(s)}
63
+ cls && txt.size > 0 ? "#{ws1}<span class='#{cls}'>#{txt}</span>#{ws2}" : "#{ws1}#{txt}#{ws2}"
64
+ end
65
+ end
66
+ end
@@ -8,7 +8,9 @@ module FoodIngredientParser::Loose
8
8
  #
9
9
  # When a contained node is found which doesn't have a name:
10
10
  # * For the amount (if any): ignore it (as it's often ambiguous which ingredient it belongs to)
11
- # * For the marks (if any): ignore it (we might instead add it to the containing ingredients)
11
+ # * For the marks (if any)
12
+ # - if the node has no siblings and no containing ingredients, add the mark to the parent (if any)
13
+ # - else ignore it (we might instead add it to the containing ingredients)
12
14
  # * For the containing ingredients (if any):
13
15
  # - if the previous ingredient is present and doesn't contain ingredients already,
14
16
  # assume the current contained ingredients are actually part of the previous ingredient.
@@ -45,6 +47,9 @@ module FoodIngredientParser::Loose
45
47
  if prev
46
48
  # there is a previous ingredient: move children to new parent
47
49
  prev.contains.push(*child.contains)
50
+ elsif child.mark && !node.mark && child.contains.empty? && !child.amount
51
+ # this is just a mark without siblings: it's a mark for its parent
52
+ node.mark = child.mark
48
53
  else
49
54
  # there is no previous ingredient: move children one level up
50
55
  new_contains.push(*child.contains)
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0'
3
- VERSION_DATE = '2018-09-21'
2
+ VERSION = '1.1.0'
3
+ VERSION_DATE = '2018-09-24'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-21 00:00:00.000000000 Z
11
+ date: 2018-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -46,6 +46,7 @@ files:
46
46
  - lib/food_ingredient_parser/loose/node.rb
47
47
  - lib/food_ingredient_parser/loose/parser.rb
48
48
  - lib/food_ingredient_parser/loose/scanner.rb
49
+ - lib/food_ingredient_parser/loose/to_html.rb
49
50
  - lib/food_ingredient_parser/loose/transform/amount.rb
50
51
  - lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
51
52
  - lib/food_ingredient_parser/loose/transform/handle_missing_name.rb