food_ingredient_parser 1.1.9 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e83f2b90f83d4cb0deb193140dcce6f4ee1700a80c781c95b7fa09219da9f69
4
- data.tar.gz: '0929b958e93cf8e61e54b5e268ad32135aa45e87d1c163ae912790de55a85e28'
3
+ metadata.gz: a56d22b7e67a3a913b051bcbda8da885ddd467dc53f5a0df0faa5b40759a1f35
4
+ data.tar.gz: 427dd79c9f9203dc7901ead6264e08c05183d02aec266ac1d3bff930a5ba1dcd
5
5
  SHA512:
6
- metadata.gz: 929fd7057be9e1f35ec1db55e08bad1078d8bc1f977b07b25c26f3f0ce0919e0cb82ca4379e050067b20ab5cee760fb10c2d267d60fc7555d18af1e8ae9ca9e9
7
- data.tar.gz: 4873942e33ad823fbf84391cc70c4f4760cf0d93cceced307814d3abf73f5e91af8529fcfa048345b0a896dd967deac729d9051ab7e89b3c4174c8ee42f9f61b
6
+ metadata.gz: 0b07032ade3a55ce208bcb0c069223b41aee21f185a2b6a9bb91332881dfef8e1d829ae966097e48ffdba9984517be43b10bd027099f9bdce04e3a4c6fc41ca8
7
+ data.tar.gz: ebdf452a09d54b151ce8cfa9bb65b4477dd1afc81bfc5cd1d94055f726d387f522dd04a3e11b73d4b26222a18bc1068912a81c8e2e3cd8439b0cee1c1ec290d7
data/README.md CHANGED
@@ -69,6 +69,7 @@ Usage: bin/food_ingredient_parser [options] --file|-f <filename>
69
69
  -r, --parser PARSER Use specific parser (strict, loose).
70
70
  -e, --[no-]escape Escape newlines
71
71
  -c, --[no-]color Use color
72
+ --[no-]html Print as HTML with parsing markup
72
73
  -v, --[no-]verbose Show more data (parsed tree).
73
74
  --version Show program version.
74
75
  -h, --help Show this help
@@ -103,6 +104,9 @@ RootNode+Root3 offset=0, "tomato" (contains,notes):
103
104
  SyntaxNode offset=6, ""
104
105
  {:contains=>[{:name=>"tomato"}]}
105
106
 
107
+ $ bin/food_ingredient_parser --html -s "tomato"
108
+ <div class="root"><span class='depth0'><span class='name'>tomato</span></span></div>
109
+
106
110
  $ food_ingredient_parser -v -r loose -s "tomato"
107
111
  "tomato"
108
112
  Node interval=0..5
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # Parser for food ingredient lists.
4
4
  #
5
+ require 'cgi'
5
6
  require 'optparse'
6
7
 
7
8
  $:.push(File.expand_path(File.dirname(__FILE__) + "/../lib"))
@@ -31,24 +32,31 @@ def colorize(color, s)
31
32
  end
32
33
  end
33
34
 
34
- def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false, color: false)
35
+ def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false, color: false, html: false)
35
36
  parsed ||= parser.parse(s)
36
37
 
37
38
  return unless print.nil? || (parsed && print == :parsed) || (!parsed && print == :noresult)
38
39
 
39
- puts colorize(color && "0;32", escape ? s.gsub("\n", "\\n") : s) if verbosity > 0
40
+ puts colorize(color && "0;32", escape ? s.gsub("\n", "\\n") : s) if !html && verbosity > 0
40
41
 
41
- if parsed
42
+ if !html && parsed
42
43
  puts(parsed.inspect) if verbosity > 1
43
44
  pp(parsed.to_h, color: color) if verbosity > 0
44
- return true
45
- else
45
+
46
+ elsif !html && !parsed
46
47
  puts "(no result: #{parser.parser.failure_reason})" if verbosity > 0
47
- return false
48
+
49
+ elsif html && parsed
50
+ puts('<div class="root">' + parsed.to_html + '</div>') if verbosity > 0
51
+
52
+ else
53
+ puts('<div class="root">' + CGI.escapeHTML(parsed) + '</div>') if verbosity > 0
48
54
  end
55
+
56
+ return !!parsed
49
57
  end
50
58
 
51
- def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: false)
59
+ def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: false, html: false)
52
60
  count_parsed = count_noresult = 0
53
61
  File.foreach(path) do |line|
54
62
  next if line =~ /^#/ # comment
@@ -59,7 +67,7 @@ def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: fa
59
67
  count_parsed += 1 if parsed
60
68
  count_noresult += 1 unless parsed
61
69
 
62
- parse_single(line, parsed, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color)
70
+ parse_single(line, parsed, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color, html: html)
63
71
  end
64
72
 
65
73
  pct_parsed = 100.0 * count_parsed / (count_parsed + count_noresult)
@@ -75,6 +83,7 @@ print = nil
75
83
  parser_name = :strict
76
84
  escape = false
77
85
  color = true
86
+ html = false
78
87
  PARSERS = {
79
88
  strict: FoodIngredientParser::Strict::Parser,
80
89
  loose: FoodIngredientParser::Loose::Parser
@@ -95,6 +104,7 @@ OptionParser.new do |opts|
95
104
  opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
96
105
  opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
97
106
  opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
107
+ opts.on( "--[no-]html", "Print as HTML with parsing markup") {|e| html = !!e }
98
108
  opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
99
109
  opts.on( "--version", "Show program version.") do
100
110
  puts("food_ingredient_parser v#{FoodIngredientParser::VERSION}")
@@ -112,8 +122,8 @@ if strings.any? || files.any?
112
122
  exit(1)
113
123
  end
114
124
  success = true
115
- strings.each {|s| success &= parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
116
- files.each {|f| success &= parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) == 0 }
125
+ strings.each {|s| success &= parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color, html: html) }
126
+ files.each {|f| success &= parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color, html: html) == 0 }
117
127
  success or exit(1)
118
128
  else
119
129
  STDERR.puts("Please specify one or more --file or --string arguments (see --help).")
@@ -19,7 +19,7 @@ module FoodIngredientParser::Loose
19
19
  T\.\s*aestivum\b(\s+vitt\.)? |
20
20
  nucifera\s+L\. |
21
21
  type\s+"\d+" |
22
- E[- ]?\d{3}[a-z]?\s*\([iv]+\) |
22
+ E(-|\s+)?\d{3}[a-z]?\s*(\([iv]+\)|\[[iv]+\]) |
23
23
  www\.[-_\/:%.A-Za-z0-9]+
24
24
  )/xi,
25
25
  *%w[
@@ -7,8 +7,8 @@ module FoodIngredientParser::Loose
7
7
  #
8
8
  # @note mark and amount is lost, this is not expected on e-numbers
9
9
 
10
- SPLIT_RE = /\s*-\s*/.freeze
11
- SINGLE_RE = /E-?\d{3}[a-z]?(?:\s*\([iv]+\))?/i.freeze
10
+ SPLIT_RE = /\s*(-|\ben\b|\band\b|\bund\b|\bet\b)\s*/.freeze
11
+ SINGLE_RE = /E(-|\s+)?\d{3}[a-z]?\s*(\([iv]+\)|\[[iv]+\])?/i.freeze
12
12
  MATCH_RE = /\A\s*(#{SINGLE_RE})(?:#{SPLIT_RE}(#{SINGLE_RE}))+\s*\z/i.freeze
13
13
 
14
14
  def self.transform!(node)
@@ -58,8 +58,8 @@ module FoodIngredientParser::Strict::Grammar
58
58
  end
59
59
 
60
60
  rule e_number
61
- ( 'E'i '-'? [0-9] [0-9] [0-9] [[:alpha:]]? )
62
- ( ( ws* '(' [iIvV]+ ')' ) / ![[:alnum:]] ) # e.g. "E450 (iii)"
61
+ ( 'E'i ( '-' / ws+ )? [0-9] [0-9] [0-9] [[:alpha:]]? )
62
+ ( ( ws* '(' [iIvV]+ ')' ) / ( ws* '[' [iIvV]+ ']' ) / ![[:alnum:]] ) # e.g. "E450 (iii)"
63
63
  end
64
64
 
65
65
  rule chem_systematic_name
@@ -5,15 +5,15 @@ module FoodIngredientParser::Strict::Grammar
5
5
  include Ingredient
6
6
 
7
7
  rule list_coloned
8
- contains:( ( ws* list_coloned_ingredient ws* '.,')+ ws* list_coloned_ingredient ) <ListNode> /
9
- contains:( ( ws* list_coloned_ingredient ws* '.,')+ ) <ListNode> /
10
- contains:( ( ws* list_coloned_ingredient ws* '.' )+ ws* list_coloned_ingredient ) <ListNode> /
11
- contains:( ( ws* list_coloned_ingredient ws* '.' )+ ) <ListNode> /
12
- contains:( ( ws* list_coloned_ingredient ws* ';,')+ ws* list_coloned_ingredient ) <ListNode> /
13
- contains:( ( ws* list_coloned_ingredient ws* ';,')+ ) <ListNode> /
14
- contains:( ( ws* list_coloned_ingredient ws* ';' )+ ws* list_coloned_ingredient ) <ListNode> /
15
- contains:( ( ws* list_coloned_ingredient ws* ';' )+ ) <ListNode> /
16
- contains:( ws* list_coloned_ingredient ) <ListNode>
8
+ contains:( ( ws* list_coloned_ingredient ws* '.' ws* ',' )+ ws* list_coloned_ingredient ) <ListNode> /
9
+ contains:( ( ws* list_coloned_ingredient ws* '.' ws* ',' )+ ) <ListNode> /
10
+ contains:( ( ws* list_coloned_ingredient ws* '.' )+ ws* list_coloned_ingredient ) <ListNode> /
11
+ contains:( ( ws* list_coloned_ingredient ws* '.' )+ ) <ListNode> /
12
+ contains:( ( ws* list_coloned_ingredient ws* ';' ws* ',' )+ ws* list_coloned_ingredient ) <ListNode> /
13
+ contains:( ( ws* list_coloned_ingredient ws* ';' ws* ',' )+ ) <ListNode> /
14
+ contains:( ( ws* list_coloned_ingredient ws* ';' )+ ws* list_coloned_ingredient ) <ListNode> /
15
+ contains:( ( ws* list_coloned_ingredient ws* ';' )+ ) <ListNode> /
16
+ contains:( ws* list_coloned_ingredient ) <ListNode>
17
17
  end
18
18
 
19
19
  rule list_coloned_inner_list
@@ -22,7 +22,7 @@ module FoodIngredientParser::Strict::Grammar
22
22
  end
23
23
 
24
24
  rule list_coloned_ingredient
25
- ing:ingredient_simple_with_amount ws* ':' ws* amount:amount post:( ws* '}' )? <IngredientNode> /
25
+ ing:ingredient_simple_with_amount ws* ':' ws* amount:amount post:( ws* '}' )? !( ws* word ) <IngredientNode> /
26
26
  ing:ingredient_simple_with_amount ws* ':' post:( ws* '}' )? ws* contains:list_coloned_inner_list <NestedIngredientNode>
27
27
  end
28
28
 
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.1.9'
3
- VERSION_DATE = '2021-01-12'
2
+ VERSION = '1.1.10'
3
+ VERSION_DATE = '2021-03-23'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.9
4
+ version: 1.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-12 00:00:00.000000000 Z
11
+ date: 2021-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop