food_ingredient_parser 1.0.0.pre.3 → 1.0.0.pre.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +22 -0
- data/food_ingredient_parser.gemspec +1 -1
- data/lib/food_ingredient_parser/grammar/amount.treetop +6 -6
- data/lib/food_ingredient_parser/grammar/common.treetop +1 -1
- data/lib/food_ingredient_parser/nodes.rb +3 -0
- data/lib/food_ingredient_parser/parser.rb +7 -4
- data/lib/food_ingredient_parser/to_html.rb +43 -0
- data/lib/food_ingredient_parser/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b180a987ae477627cd2046f967d54eeb160fc7ab
|
4
|
+
data.tar.gz: '0981287cb5348c58a5fc2e0fda39c470a0b10746'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e11033d79eff439544d71c789981361c98192d767687148cca73e02308f288aac474b84f8ffe1b2a08901df481e4a7c03c162bbcc8ed6f5c030b48f12e4ebb61
|
7
|
+
data.tar.gz: a44018d0bc1c77d7907e5fa31cf5fe7e919c2e20bf5480b07bbd0de33ff2e6aec4a2a7981b8a22906c574f9140bede380cc87a7917fc84a0f6685190d26925c7
|
data/README.md
CHANGED
@@ -108,6 +108,28 @@ parsed 35 (100.0%), no result 0 (0.0%)
|
|
108
108
|
|
109
109
|
If you want to use the output in (shell)scripts, the options `-e -c` may be quite useful.
|
110
110
|
|
111
|
+
## `to_html`
|
112
|
+
|
113
|
+
When ingredient lists are entered manually, it can be very useful to show how the text is
|
114
|
+
recognized. This can help understanding why a certain ingredients list cannot be parsed.
|
115
|
+
|
116
|
+
For this you can use the `to_html` method on the parsed output, which returns the original
|
117
|
+
text, augmented with CSS classes for different parts.
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
require 'food_ingredient_parser'
|
121
|
+
|
122
|
+
parsed = FoodIngredientParser::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
|
123
|
+
puts parsed.to_html
|
124
|
+
```
|
125
|
+
|
126
|
+
```html
|
127
|
+
<span class='name'>Saus</span> (<span class='amount'>10%</span>
|
128
|
+
<span class='name'>tomaat</span><span class='mark'>*</span>,
|
129
|
+
<span class='name'>zout</span>). <span class='note'>* = bio</span>
|
130
|
+
```
|
131
|
+
|
132
|
+
|
111
133
|
## Test data
|
112
134
|
|
113
135
|
[`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
|
@@ -4,7 +4,7 @@ require 'food_ingredient_parser/version'
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'food_ingredient_parser'
|
6
6
|
s.version = FoodIngredientParser::VERSION
|
7
|
-
s.date = '2018-
|
7
|
+
s.date = '2018-09-05'
|
8
8
|
s.summary = 'Parser for ingredient lists found on food products.'
|
9
9
|
s.authors = ['wvengen']
|
10
10
|
s.email = ['dev-ruby@willem.engen.nl']
|
@@ -11,19 +11,19 @@ module FoodIngredientParser::Grammar
|
|
11
11
|
|
12
12
|
rule simple_amount
|
13
13
|
( (
|
14
|
-
'of which' / 'at least' /
|
15
|
-
'waarvan' / 'ten minste' / 'tenminste' / 'minimaal'
|
14
|
+
'of which'i / 'at least'i / 'minimal'i / 'maximal'i / 'less than'i / 'more than'i /
|
15
|
+
'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i
|
16
16
|
) ws* )?
|
17
|
-
[
|
17
|
+
[±∓~∼∽≂≃≈≲≤<>≥≳]? ws*
|
18
18
|
simple_amount_quantity
|
19
19
|
( ws+ (
|
20
|
-
'minimum' /
|
21
|
-
'minimaal' / 'minimum'
|
20
|
+
'minimum'i /
|
21
|
+
'minimaal'i / 'minimum'i
|
22
22
|
) )?
|
23
23
|
end
|
24
24
|
|
25
25
|
rule simple_amount_quantity
|
26
|
-
number ( ws* '-' ws* number )? ws* ( '%' / 'g' / 'mg' / 'gram' / 'ml' )
|
26
|
+
number ( ws* '-' ws* number )? ws* ( '%' / 'g'i / 'mg'i / 'gram'i / 'ml'i )
|
27
27
|
end
|
28
28
|
|
29
29
|
end
|
@@ -20,7 +20,7 @@ module FoodIngredientParser::Grammar
|
|
20
20
|
|
21
21
|
rule mark
|
22
22
|
# mark referencing a footnote
|
23
|
-
[¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°#^] / '*'+
|
23
|
+
[¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°#^] / '*'+ / '(' ws* ( [†‡•°#^] / '*'+ ) ws* ')'
|
24
24
|
end
|
25
25
|
|
26
26
|
rule digit
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'treetop/runtime'
|
2
|
+
require_relative 'to_html'
|
2
3
|
|
3
4
|
# Needs to be in grammar namespace so Treetop can find the nodes.
|
4
5
|
module FoodIngredientParser::Grammar
|
@@ -18,6 +19,8 @@ module FoodIngredientParser::Grammar
|
|
18
19
|
|
19
20
|
# Root object, contains everything else.
|
20
21
|
class RootNode < SyntaxNode
|
22
|
+
include FoodIngredientParser::ToHtml
|
23
|
+
|
21
24
|
def to_h
|
22
25
|
h = { contains: contains.to_a }
|
23
26
|
if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
|
@@ -5,20 +5,23 @@ module FoodIngredientParser
|
|
5
5
|
|
6
6
|
# @!attribute [r] parser
|
7
7
|
# @return [Treetop::Runtime::CompiledParser] low-level parser object
|
8
|
+
# @note This attribute is there for convenience, but may change in the future. Take care.
|
8
9
|
attr_reader :parser
|
9
10
|
|
10
11
|
# Create a new food ingredient parser
|
11
|
-
# @return [FoodIngredientParser]
|
12
|
+
# @return [FoodIngredientParser::Parser]
|
12
13
|
def initialize
|
13
14
|
@parser = Grammar::RootParser.new
|
14
15
|
end
|
15
16
|
|
16
17
|
# Parse food ingredient list text into a structured representation.
|
18
|
+
#
|
17
19
|
# @option clean [Boolean] pass +false+ to disable correcting frequently occuring issues
|
18
|
-
# @return [
|
19
|
-
|
20
|
+
# @return [FoodIngredientParser::Grammar::RootNode] structured representation of food ingredients
|
21
|
+
# @note Unrecognized options are passed to Treetop, but this is not guarenteed to remain so forever.
|
22
|
+
def parse(s, clean: true, **options)
|
20
23
|
s = clean(s) if clean
|
21
|
-
@parser.parse(s)
|
24
|
+
@parser.parse(s, **options)
|
22
25
|
end
|
23
26
|
|
24
27
|
private
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
# Adds HTML output functionality to a Treetop Node.
|
4
|
+
#
|
5
|
+
# The node needs to provide a {#to_h} method (for {#to_html_h}).
|
6
|
+
#
|
7
|
+
module FoodIngredientParser::ToHtml
|
8
|
+
|
9
|
+
# Markup original ingredients list text in HTML.
|
10
|
+
#
|
11
|
+
# The input text is returned as HTML, augmented with CSS classes
|
12
|
+
# on +span+s for +name+, +amount+, +mark+ and +note+.
|
13
|
+
#
|
14
|
+
# @return [String] HTML representation of ingredient list.
|
15
|
+
def to_html
|
16
|
+
node_to_html(self)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def node_to_html(node, cls=nil)
|
22
|
+
el_cls = {} # map of node instances to class names for contained elements
|
23
|
+
terminal = node.terminal? # whether to look at children elements or not
|
24
|
+
|
25
|
+
if node.is_a?(FoodIngredientParser::Grammar::AmountNode)
|
26
|
+
cls ||= "amount"
|
27
|
+
elsif node.is_a?(FoodIngredientParser::Grammar::NoteNode)
|
28
|
+
cls ||= "note"
|
29
|
+
terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
|
30
|
+
elsif node.is_a?(FoodIngredientParser::Grammar::IngredientNode)
|
31
|
+
el_cls[node.name] = "name" if node.respond_to?(:name)
|
32
|
+
el_cls[node.mark] = "mark" if node.respond_to?(:mark)
|
33
|
+
end
|
34
|
+
|
35
|
+
val = if terminal
|
36
|
+
CGI.escapeHTML(node.text_value)
|
37
|
+
else
|
38
|
+
node.elements.map {|el| node_to_html(el, el_cls[el]) }.join("")
|
39
|
+
end
|
40
|
+
|
41
|
+
cls ? "<span class='#{cls}'>#{val}</span>" : val
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.pre.
|
4
|
+
version: 1.0.0.pre.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -55,6 +55,7 @@ files:
|
|
55
55
|
- lib/food_ingredient_parser/grammar/root.treetop
|
56
56
|
- lib/food_ingredient_parser/nodes.rb
|
57
57
|
- lib/food_ingredient_parser/parser.rb
|
58
|
+
- lib/food_ingredient_parser/to_html.rb
|
58
59
|
- lib/food_ingredient_parser/version.rb
|
59
60
|
homepage: https://github.com/q-m/food-ingredient-parser-ruby
|
60
61
|
licenses:
|