food_ingredient_parser 1.0.0.pre.3 → 1.0.0.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d687958a818bdfe88bfa30bce9ff2a87db5073fe
4
- data.tar.gz: 69c9e1547c02635b305003e16a62666fb9fc4c68
3
+ metadata.gz: b180a987ae477627cd2046f967d54eeb160fc7ab
4
+ data.tar.gz: '0981287cb5348c58a5fc2e0fda39c470a0b10746'
5
5
  SHA512:
6
- metadata.gz: afed94a75e31892e5e80f5b84b3d920634a670fe7dd061a611f2fcbc5adf8ebe294a6709f17a7c2c0fcafc3c6fc12f36c5aff93ce6b1936947fd2984fc393a0c
7
- data.tar.gz: a3d81d578349e279f8630b661b26693c008321b5ed1b2fc76872e8de8175c2258afe744e7ca70eb192a4a65357c3995157e776584d0ca3bb1d23c44a6f4173d8
6
+ metadata.gz: e11033d79eff439544d71c789981361c98192d767687148cca73e02308f288aac474b84f8ffe1b2a08901df481e4a7c03c162bbcc8ed6f5c030b48f12e4ebb61
7
+ data.tar.gz: a44018d0bc1c77d7907e5fa31cf5fe7e919c2e20bf5480b07bbd0de33ff2e6aec4a2a7981b8a22906c574f9140bede380cc87a7917fc84a0f6685190d26925c7
data/README.md CHANGED
@@ -108,6 +108,28 @@ parsed 35 (100.0%), no result 0 (0.0%)
108
108
 
109
109
  If you want to use the output in (shell)scripts, the options `-e -c` may be quite useful.
110
110
 
111
+ ## `to_html`
112
+
113
+ When ingredient lists are entered manually, it can be very useful to show how the text is
114
+ recognized. This can help understanding why a certain ingredients list cannot be parsed.
115
+
116
+ For this you can use the `to_html` method on the parsed output, which returns the original
117
+ text, augmented with CSS classes for different parts.
118
+
119
+ ```ruby
120
+ require 'food_ingredient_parser'
121
+
122
+ parsed = FoodIngredientParser::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
123
+ puts parsed.to_html
124
+ ```
125
+
126
+ ```html
127
+ <span class='name'>Saus</span> (<span class='amount'>10%</span>
128
+ <span class='name'>tomaat</span><span class='mark'>*</span>,
129
+ <span class='name'>zout</span>). <span class='note'>* = bio</span>
130
+ ```
131
+
132
+
111
133
  ## Test data
112
134
 
113
135
  [`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
@@ -4,7 +4,7 @@ require 'food_ingredient_parser/version'
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'food_ingredient_parser'
6
6
  s.version = FoodIngredientParser::VERSION
7
- s.date = '2018-08-31'
7
+ s.date = '2018-09-05'
8
8
  s.summary = 'Parser for ingredient lists found on food products.'
9
9
  s.authors = ['wvengen']
10
10
  s.email = ['dev-ruby@willem.engen.nl']
@@ -11,19 +11,19 @@ module FoodIngredientParser::Grammar
11
11
 
12
12
  rule simple_amount
13
13
  ( (
14
- 'of which' / 'at least' /
15
- 'waarvan' / 'ten minste' / 'tenminste' / 'minimaal'
14
+ 'of which'i / 'at least'i / 'minimal'i / 'maximal'i / 'less than'i / 'more than'i /
15
+ 'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i
16
16
  ) ws* )?
17
- [<>]? ws*
17
+ [±∓~∼∽≂≃≈≲≤<>≥≳]? ws*
18
18
  simple_amount_quantity
19
19
  ( ws+ (
20
- 'minimum' /
21
- 'minimaal' / 'minimum'
20
+ 'minimum'i /
21
+ 'minimaal'i / 'minimum'i
22
22
  ) )?
23
23
  end
24
24
 
25
25
  rule simple_amount_quantity
26
- number ( ws* '-' ws* number )? ws* ( '%' / 'g' / 'mg' / 'gram' / 'ml' )
26
+ number ( ws* '-' ws* number )? ws* ( '%' / 'g'i / 'mg'i / 'gram'i / 'ml'i )
27
27
  end
28
28
 
29
29
  end
@@ -20,7 +20,7 @@ module FoodIngredientParser::Grammar
20
20
 
21
21
  rule mark
22
22
  # mark referencing a footnote
23
- [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°#^] / '*'+
23
+ [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°#^] / '*'+ / '(' ws* ( [†‡•°#^] / '*'+ ) ws* ')'
24
24
  end
25
25
 
26
26
  rule digit
@@ -1,4 +1,5 @@
1
1
  require 'treetop/runtime'
2
+ require_relative 'to_html'
2
3
 
3
4
  # Needs to be in grammar namespace so Treetop can find the nodes.
4
5
  module FoodIngredientParser::Grammar
@@ -18,6 +19,8 @@ module FoodIngredientParser::Grammar
18
19
 
19
20
  # Root object, contains everything else.
20
21
  class RootNode < SyntaxNode
22
+ include FoodIngredientParser::ToHtml
23
+
21
24
  def to_h
22
25
  h = { contains: contains.to_a }
23
26
  if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
@@ -5,20 +5,23 @@ module FoodIngredientParser
5
5
 
6
6
  # @!attribute [r] parser
7
7
  # @return [Treetop::Runtime::CompiledParser] low-level parser object
8
+ # @note This attribute is there for convenience, but may change in the future. Take care.
8
9
  attr_reader :parser
9
10
 
10
11
  # Create a new food ingredient parser
11
- # @return [FoodIngredientParser]
12
+ # @return [FoodIngredientParser::Parser]
12
13
  def initialize
13
14
  @parser = Grammar::RootParser.new
14
15
  end
15
16
 
16
17
  # Parse food ingredient list text into a structured representation.
18
+ #
17
19
  # @option clean [Boolean] pass +false+ to disable correcting frequently occuring issues
18
- # @return [Hash] structured representation of food ingredients
19
- def parse(s, clean: true)
20
+ # @return [FoodIngredientParser::Grammar::RootNode] structured representation of food ingredients
21
+ # @note Unrecognized options are passed to Treetop, but this is not guarenteed to remain so forever.
22
+ def parse(s, clean: true, **options)
20
23
  s = clean(s) if clean
21
- @parser.parse(s)
24
+ @parser.parse(s, **options)
22
25
  end
23
26
 
24
27
  private
@@ -0,0 +1,43 @@
1
+ require 'cgi'
2
+
3
+ # Adds HTML output functionality to a Treetop Node.
4
+ #
5
+ # The node needs to provide a {#to_h} method (for {#to_html_h}).
6
+ #
7
+ module FoodIngredientParser::ToHtml
8
+
9
+ # Markup original ingredients list text in HTML.
10
+ #
11
+ # The input text is returned as HTML, augmented with CSS classes
12
+ # on +span+s for +name+, +amount+, +mark+ and +note+.
13
+ #
14
+ # @return [String] HTML representation of ingredient list.
15
+ def to_html
16
+ node_to_html(self)
17
+ end
18
+
19
+ private
20
+
21
+ def node_to_html(node, cls=nil)
22
+ el_cls = {} # map of node instances to class names for contained elements
23
+ terminal = node.terminal? # whether to look at children elements or not
24
+
25
+ if node.is_a?(FoodIngredientParser::Grammar::AmountNode)
26
+ cls ||= "amount"
27
+ elsif node.is_a?(FoodIngredientParser::Grammar::NoteNode)
28
+ cls ||= "note"
29
+ terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
30
+ elsif node.is_a?(FoodIngredientParser::Grammar::IngredientNode)
31
+ el_cls[node.name] = "name" if node.respond_to?(:name)
32
+ el_cls[node.mark] = "mark" if node.respond_to?(:mark)
33
+ end
34
+
35
+ val = if terminal
36
+ CGI.escapeHTML(node.text_value)
37
+ else
38
+ node.elements.map {|el| node_to_html(el, el_cls[el]) }.join("")
39
+ end
40
+
41
+ cls ? "<span class='#{cls}'>#{val}</span>" : val
42
+ end
43
+ end
@@ -1,3 +1,3 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0.pre.3'
2
+ VERSION = '1.0.0.pre.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.pre.3
4
+ version: 1.0.0.pre.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-31 00:00:00.000000000 Z
11
+ date: 2018-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -55,6 +55,7 @@ files:
55
55
  - lib/food_ingredient_parser/grammar/root.treetop
56
56
  - lib/food_ingredient_parser/nodes.rb
57
57
  - lib/food_ingredient_parser/parser.rb
58
+ - lib/food_ingredient_parser/to_html.rb
58
59
  - lib/food_ingredient_parser/version.rb
59
60
  homepage: https://github.com/q-m/food-ingredient-parser-ruby
60
61
  licenses: