food_ingredient_parser 1.0.0.pre.3 → 1.0.0.pre.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d687958a818bdfe88bfa30bce9ff2a87db5073fe
4
- data.tar.gz: 69c9e1547c02635b305003e16a62666fb9fc4c68
3
+ metadata.gz: b180a987ae477627cd2046f967d54eeb160fc7ab
4
+ data.tar.gz: '0981287cb5348c58a5fc2e0fda39c470a0b10746'
5
5
  SHA512:
6
- metadata.gz: afed94a75e31892e5e80f5b84b3d920634a670fe7dd061a611f2fcbc5adf8ebe294a6709f17a7c2c0fcafc3c6fc12f36c5aff93ce6b1936947fd2984fc393a0c
7
- data.tar.gz: a3d81d578349e279f8630b661b26693c008321b5ed1b2fc76872e8de8175c2258afe744e7ca70eb192a4a65357c3995157e776584d0ca3bb1d23c44a6f4173d8
6
+ metadata.gz: e11033d79eff439544d71c789981361c98192d767687148cca73e02308f288aac474b84f8ffe1b2a08901df481e4a7c03c162bbcc8ed6f5c030b48f12e4ebb61
7
+ data.tar.gz: a44018d0bc1c77d7907e5fa31cf5fe7e919c2e20bf5480b07bbd0de33ff2e6aec4a2a7981b8a22906c574f9140bede380cc87a7917fc84a0f6685190d26925c7
data/README.md CHANGED
@@ -108,6 +108,28 @@ parsed 35 (100.0%), no result 0 (0.0%)
108
108
 
109
109
  If you want to use the output in (shell)scripts, the options `-e -c` may be quite useful.
110
110
 
111
+ ## `to_html`
112
+
113
+ When ingredient lists are entered manually, it can be very useful to show how the text is
114
+ recognized. This can help understanding why a certain ingredients list cannot be parsed.
115
+
116
+ For this you can use the `to_html` method on the parsed output, which returns the original
117
+ text, augmented with CSS classes for different parts.
118
+
119
+ ```ruby
120
+ require 'food_ingredient_parser'
121
+
122
+ parsed = FoodIngredientParser::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
123
+ puts parsed.to_html
124
+ ```
125
+
126
+ ```html
127
+ <span class='name'>Saus</span> (<span class='amount'>10%</span>
128
+ <span class='name'>tomaat</span><span class='mark'>*</span>,
129
+ <span class='name'>zout</span>). <span class='note'>* = bio</span>
130
+ ```
131
+
132
+
111
133
  ## Test data
112
134
 
113
135
  [`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
@@ -4,7 +4,7 @@ require 'food_ingredient_parser/version'
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'food_ingredient_parser'
6
6
  s.version = FoodIngredientParser::VERSION
7
- s.date = '2018-08-31'
7
+ s.date = '2018-09-05'
8
8
  s.summary = 'Parser for ingredient lists found on food products.'
9
9
  s.authors = ['wvengen']
10
10
  s.email = ['dev-ruby@willem.engen.nl']
@@ -11,19 +11,19 @@ module FoodIngredientParser::Grammar
11
11
 
12
12
  rule simple_amount
13
13
  ( (
14
- 'of which' / 'at least' /
15
- 'waarvan' / 'ten minste' / 'tenminste' / 'minimaal'
14
+ 'of which'i / 'at least'i / 'minimal'i / 'maximal'i / 'less than'i / 'more than'i /
15
+ 'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i
16
16
  ) ws* )?
17
- [<>]? ws*
17
+ [±∓~∼∽≂≃≈≲≤<>≥≳]? ws*
18
18
  simple_amount_quantity
19
19
  ( ws+ (
20
- 'minimum' /
21
- 'minimaal' / 'minimum'
20
+ 'minimum'i /
21
+ 'minimaal'i / 'minimum'i
22
22
  ) )?
23
23
  end
24
24
 
25
25
  rule simple_amount_quantity
26
- number ( ws* '-' ws* number )? ws* ( '%' / 'g' / 'mg' / 'gram' / 'ml' )
26
+ number ( ws* '-' ws* number )? ws* ( '%' / 'g'i / 'mg'i / 'gram'i / 'ml'i )
27
27
  end
28
28
 
29
29
  end
@@ -20,7 +20,7 @@ module FoodIngredientParser::Grammar
20
20
 
21
21
  rule mark
22
22
  # mark referencing a footnote
23
- [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°#^] / '*'+
23
+ [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°#^] / '*'+ / '(' ws* ( [†‡•°#^] / '*'+ ) ws* ')'
24
24
  end
25
25
 
26
26
  rule digit
@@ -1,4 +1,5 @@
1
1
  require 'treetop/runtime'
2
+ require_relative 'to_html'
2
3
 
3
4
  # Needs to be in grammar namespace so Treetop can find the nodes.
4
5
  module FoodIngredientParser::Grammar
@@ -18,6 +19,8 @@ module FoodIngredientParser::Grammar
18
19
 
19
20
  # Root object, contains everything else.
20
21
  class RootNode < SyntaxNode
22
+ include FoodIngredientParser::ToHtml
23
+
21
24
  def to_h
22
25
  h = { contains: contains.to_a }
23
26
  if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
@@ -5,20 +5,23 @@ module FoodIngredientParser
5
5
 
6
6
  # @!attribute [r] parser
7
7
  # @return [Treetop::Runtime::CompiledParser] low-level parser object
8
+ # @note This attribute is there for convenience, but may change in the future. Take care.
8
9
  attr_reader :parser
9
10
 
10
11
  # Create a new food ingredient parser
11
- # @return [FoodIngredientParser]
12
+ # @return [FoodIngredientParser::Parser]
12
13
  def initialize
13
14
  @parser = Grammar::RootParser.new
14
15
  end
15
16
 
16
17
  # Parse food ingredient list text into a structured representation.
18
+ #
17
19
  # @option clean [Boolean] pass +false+ to disable correcting frequently occuring issues
18
- # @return [Hash] structured representation of food ingredients
19
- def parse(s, clean: true)
20
+ # @return [FoodIngredientParser::Grammar::RootNode] structured representation of food ingredients
21
+ # @note Unrecognized options are passed to Treetop, but this is not guarenteed to remain so forever.
22
+ def parse(s, clean: true, **options)
20
23
  s = clean(s) if clean
21
- @parser.parse(s)
24
+ @parser.parse(s, **options)
22
25
  end
23
26
 
24
27
  private
@@ -0,0 +1,43 @@
1
+ require 'cgi'
2
+
3
+ # Adds HTML output functionality to a Treetop Node.
4
+ #
5
+ # The node needs to provide a {#to_h} method (for {#to_html_h}).
6
+ #
7
+ module FoodIngredientParser::ToHtml
8
+
9
+ # Markup original ingredients list text in HTML.
10
+ #
11
+ # The input text is returned as HTML, augmented with CSS classes
12
+ # on +span+s for +name+, +amount+, +mark+ and +note+.
13
+ #
14
+ # @return [String] HTML representation of ingredient list.
15
+ def to_html
16
+ node_to_html(self)
17
+ end
18
+
19
+ private
20
+
21
+ def node_to_html(node, cls=nil)
22
+ el_cls = {} # map of node instances to class names for contained elements
23
+ terminal = node.terminal? # whether to look at children elements or not
24
+
25
+ if node.is_a?(FoodIngredientParser::Grammar::AmountNode)
26
+ cls ||= "amount"
27
+ elsif node.is_a?(FoodIngredientParser::Grammar::NoteNode)
28
+ cls ||= "note"
29
+ terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
30
+ elsif node.is_a?(FoodIngredientParser::Grammar::IngredientNode)
31
+ el_cls[node.name] = "name" if node.respond_to?(:name)
32
+ el_cls[node.mark] = "mark" if node.respond_to?(:mark)
33
+ end
34
+
35
+ val = if terminal
36
+ CGI.escapeHTML(node.text_value)
37
+ else
38
+ node.elements.map {|el| node_to_html(el, el_cls[el]) }.join("")
39
+ end
40
+
41
+ cls ? "<span class='#{cls}'>#{val}</span>" : val
42
+ end
43
+ end
@@ -1,3 +1,3 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0.pre.3'
2
+ VERSION = '1.0.0.pre.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.pre.3
4
+ version: 1.0.0.pre.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-31 00:00:00.000000000 Z
11
+ date: 2018-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -55,6 +55,7 @@ files:
55
55
  - lib/food_ingredient_parser/grammar/root.treetop
56
56
  - lib/food_ingredient_parser/nodes.rb
57
57
  - lib/food_ingredient_parser/parser.rb
58
+ - lib/food_ingredient_parser/to_html.rb
58
59
  - lib/food_ingredient_parser/version.rb
59
60
  homepage: https://github.com/q-m/food-ingredient-parser-ruby
60
61
  licenses: