food_ingredient_parser 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/food_ingredient_parser/loose/node.rb +4 -0
- data/lib/food_ingredient_parser/loose/scanner.rb +5 -4
- data/lib/food_ingredient_parser/loose/to_html.rb +66 -0
- data/lib/food_ingredient_parser/loose/transform/handle_missing_name.rb +6 -1
- data/lib/food_ingredient_parser/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 629792af43b06c646b98c2ab8ee136895123e3e5
|
4
|
+
data.tar.gz: 6abbe88e79943cedb8b8776146aab6fdffd427e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1728c6edc995f6a5a18b82eae0e217fa18814c75f1b34d33d52abc4f593428020ba6a425ea526519036a5d4a412a80b43aae55147b2a07bae0e0979394b26bd
|
7
|
+
data.tar.gz: a9f43ad8e20ab867293e1db59aeb5b7bd6147a6e560adb58159f345d441c6445be453b7eb4ba01bdd4169ae951444e4bf90a6e7873cf85cfa7557b4ac4859d36
|
data/README.md
CHANGED
@@ -121,7 +121,7 @@ When ingredient lists are entered manually, it can be very useful to show how th
|
|
121
121
|
recognized. This can help understanding why a certain ingredients list cannot be parsed.
|
122
122
|
|
123
123
|
For this you can use the `to_html` method on the parsed output, which returns the original
|
124
|
-
text, augmented with CSS classes for different parts.
|
124
|
+
text, augmented with CSS classes for different parts.
|
125
125
|
|
126
126
|
```ruby
|
127
127
|
require 'food_ingredient_parser'
|
@@ -178,8 +178,8 @@ Even though the strict parser would not give a result, the loose parser returns:
|
|
178
178
|
|
179
179
|
From the 1.0.0 release, the main interface will be stable. This comprises the two parser's `parse`
|
180
180
|
methods (incl. documented options), its `nil` result when parsing failed, and the parsed output's
|
181
|
-
`to_h` and `to_html` methods
|
182
|
-
|
181
|
+
`to_h` and `to_html` methods. Please note that parsed node trees may be subject to change, even within
|
182
|
+
a major release. Within a minor release, node trees are expected to remain stable.
|
183
183
|
|
184
184
|
So if you only use the stable interface (`parse`, `to_h` and `to_html`), you can lock your version
|
185
185
|
to e.g. `~> 1.0`. If you depend on more, lock your version against e.g. `~> 1.0.0` and test when you
|
@@ -149,9 +149,10 @@ module FoodIngredientParser::Loose
|
|
149
149
|
|
150
150
|
def is_notes_start?
|
151
151
|
# @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
|
152
|
-
|
153
|
-
|
154
|
-
( @s[@i
|
152
|
+
ml = mark_len
|
153
|
+
if ( is_mark? && @s[@i+ml .. -1] =~ /\A\s*=/ ) || # "* = Biologisch"
|
154
|
+
( is_mark? && @s[@i-1] =~ /\s/ && @s[@i+ml .. -1] =~ /\A\s*\w/ ) || # " **Biologisch"
|
155
|
+
( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
|
155
156
|
@i -= 1 # we want to include the mark in the note
|
156
157
|
true
|
157
158
|
# End of sentence
|
@@ -196,7 +197,7 @@ module FoodIngredientParser::Loose
|
|
196
197
|
cur.name ||= begin
|
197
198
|
i, j = cur.interval.first, @i - 1
|
198
199
|
i += mark_len(i) # skip any mark in front
|
199
|
-
Node.new(@s, i .. j) if j
|
200
|
+
Node.new(@s, i .. j) if j >= i
|
200
201
|
end
|
201
202
|
end
|
202
203
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
# Adds HTML output functionality to a Node.
|
4
|
+
#
|
5
|
+
module FoodIngredientParser::Loose
|
6
|
+
module ToHtml
|
7
|
+
|
8
|
+
# Markup original ingredients list text in HTML.
|
9
|
+
#
|
10
|
+
# The input text is returned as HTML, augmented with CSS classes
|
11
|
+
# on +span+s for +name+, +amount+, +mark+ and +note+.
|
12
|
+
#
|
13
|
+
# @return [String] HTML representation of ingredient list.
|
14
|
+
def to_html
|
15
|
+
node_to_html(self)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def node_to_html(node, depth=0)
|
21
|
+
children = [*node.contains, *node.notes, node.name, node.amount, node.mark].compact
|
22
|
+
children.sort_by! {|n| n.interval.first }
|
23
|
+
|
24
|
+
html = ""
|
25
|
+
last_idx = node.interval.first - 1
|
26
|
+
children.each do |child|
|
27
|
+
# we don't have nodes for all characters, make sure they are in the output
|
28
|
+
if child.interval.first - 1 > last_idx
|
29
|
+
html += CGI.escapeHTML(node.input[last_idx + 1 .. child.interval.first - 1])
|
30
|
+
last_idx = child.interval.first - 1
|
31
|
+
end
|
32
|
+
|
33
|
+
if child == node.name
|
34
|
+
html += node_to_html_single(child, "name")
|
35
|
+
last_idx = child.interval.last
|
36
|
+
elsif child == node.amount
|
37
|
+
html += node_to_html_single(child, "amount")
|
38
|
+
last_idx = child.interval.last
|
39
|
+
elsif child == node.mark
|
40
|
+
html += node_to_html_single(child, "mark")
|
41
|
+
last_idx = child.interval.last
|
42
|
+
elsif node.notes.include?(child)
|
43
|
+
html += node_to_html_single(child, "note")
|
44
|
+
last_idx = child.interval.last
|
45
|
+
elsif node.contains.include?(child)
|
46
|
+
cls = "depth#{depth}"
|
47
|
+
cls = "contains #{cls}" if depth > 0
|
48
|
+
html += "<span class='#{cls}'>#{node_to_html(child, depth + 1)}</span>"
|
49
|
+
last_idx = child.interval.last
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# include any trailing characters
|
54
|
+
if children.any? && last_idx < node.interval.last
|
55
|
+
html += CGI.escapeHTML(node.input[last_idx + 1 .. node.interval.last])
|
56
|
+
end
|
57
|
+
|
58
|
+
html
|
59
|
+
end
|
60
|
+
|
61
|
+
def node_to_html_single(node, cls=nil)
|
62
|
+
ws1, txt, ws2 = node.text_value.match(/\A(\s*)(.*?)(\s*)\z/).captures.map {|s| CGI.escapeHTML(s)}
|
63
|
+
cls && txt.size > 0 ? "#{ws1}<span class='#{cls}'>#{txt}</span>#{ws2}" : "#{ws1}#{txt}#{ws2}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -8,7 +8,9 @@ module FoodIngredientParser::Loose
|
|
8
8
|
#
|
9
9
|
# When a contained node is found which doesn't have a name:
|
10
10
|
# * For the amount (if any): ignore it (as it's often ambiguous which ingredient it belongs to)
|
11
|
-
# * For the marks (if any)
|
11
|
+
# * For the marks (if any)
|
12
|
+
# - if the node has no siblings and no containing ingredients, add the mark to the parent (if any)
|
13
|
+
# - else ignore it (we might instead add it to the containing ingredients)
|
12
14
|
# * For the containing ingredients (if any):
|
13
15
|
# - if the previous ingredient is present and doesn't contain ingredients already,
|
14
16
|
# assume the current contained ingredients are actually part of the previous ingredient.
|
@@ -45,6 +47,9 @@ module FoodIngredientParser::Loose
|
|
45
47
|
if prev
|
46
48
|
# there is a previous ingredient: move children to new parent
|
47
49
|
prev.contains.push(*child.contains)
|
50
|
+
elsif child.mark && !node.mark && child.contains.empty? && !child.amount
|
51
|
+
# this is just a mark without siblings: it's a mark for its parent
|
52
|
+
node.mark = child.mark
|
48
53
|
else
|
49
54
|
# there is no previous ingredient: move children one level up
|
50
55
|
new_contains.push(*child.contains)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -46,6 +46,7 @@ files:
|
|
46
46
|
- lib/food_ingredient_parser/loose/node.rb
|
47
47
|
- lib/food_ingredient_parser/loose/parser.rb
|
48
48
|
- lib/food_ingredient_parser/loose/scanner.rb
|
49
|
+
- lib/food_ingredient_parser/loose/to_html.rb
|
49
50
|
- lib/food_ingredient_parser/loose/transform/amount.rb
|
50
51
|
- lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
|
51
52
|
- lib/food_ingredient_parser/loose/transform/handle_missing_name.rb
|