food_ingredient_parser 1.1.9 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/bin/food_ingredient_parser +20 -10
- data/lib/food_ingredient_parser/loose/scanner.rb +1 -1
- data/lib/food_ingredient_parser/loose/transform/split_e_numbers.rb +2 -2
- data/lib/food_ingredient_parser/strict/grammar/common.treetop +2 -2
- data/lib/food_ingredient_parser/strict/grammar/list_coloned.treetop +10 -10
- data/lib/food_ingredient_parser/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a56d22b7e67a3a913b051bcbda8da885ddd467dc53f5a0df0faa5b40759a1f35
|
|
4
|
+
data.tar.gz: 427dd79c9f9203dc7901ead6264e08c05183d02aec266ac1d3bff930a5ba1dcd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0b07032ade3a55ce208bcb0c069223b41aee21f185a2b6a9bb91332881dfef8e1d829ae966097e48ffdba9984517be43b10bd027099f9bdce04e3a4c6fc41ca8
|
|
7
|
+
data.tar.gz: ebdf452a09d54b151ce8cfa9bb65b4477dd1afc81bfc5cd1d94055f726d387f522dd04a3e11b73d4b26222a18bc1068912a81c8e2e3cd8439b0cee1c1ec290d7
|
data/README.md
CHANGED
|
@@ -69,6 +69,7 @@ Usage: bin/food_ingredient_parser [options] --file|-f <filename>
|
|
|
69
69
|
-r, --parser PARSER Use specific parser (strict, loose).
|
|
70
70
|
-e, --[no-]escape Escape newlines
|
|
71
71
|
-c, --[no-]color Use color
|
|
72
|
+
--[no-]html Print as HTML with parsing markup
|
|
72
73
|
-v, --[no-]verbose Show more data (parsed tree).
|
|
73
74
|
--version Show program version.
|
|
74
75
|
-h, --help Show this help
|
|
@@ -103,6 +104,9 @@ RootNode+Root3 offset=0, "tomato" (contains,notes):
|
|
|
103
104
|
SyntaxNode offset=6, ""
|
|
104
105
|
{:contains=>[{:name=>"tomato"}]}
|
|
105
106
|
|
|
107
|
+
$ bin/food_ingredient_parser --html -s "tomato"
|
|
108
|
+
<div class="root"><span class='depth0'><span class='name'>tomato</span></span></div>
|
|
109
|
+
|
|
106
110
|
$ food_ingredient_parser -v -r loose -s "tomato"
|
|
107
111
|
"tomato"
|
|
108
112
|
Node interval=0..5
|
data/bin/food_ingredient_parser
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# Parser for food ingredient lists.
|
|
4
4
|
#
|
|
5
|
+
require 'cgi'
|
|
5
6
|
require 'optparse'
|
|
6
7
|
|
|
7
8
|
$:.push(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
@@ -31,24 +32,31 @@ def colorize(color, s)
|
|
|
31
32
|
end
|
|
32
33
|
end
|
|
33
34
|
|
|
34
|
-
def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false, color: false)
|
|
35
|
+
def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false, color: false, html: false)
|
|
35
36
|
parsed ||= parser.parse(s)
|
|
36
37
|
|
|
37
38
|
return unless print.nil? || (parsed && print == :parsed) || (!parsed && print == :noresult)
|
|
38
39
|
|
|
39
|
-
puts colorize(color && "0;32", escape ? s.gsub("\n", "\\n") : s) if verbosity > 0
|
|
40
|
+
puts colorize(color && "0;32", escape ? s.gsub("\n", "\\n") : s) if !html && verbosity > 0
|
|
40
41
|
|
|
41
|
-
if parsed
|
|
42
|
+
if !html && parsed
|
|
42
43
|
puts(parsed.inspect) if verbosity > 1
|
|
43
44
|
pp(parsed.to_h, color: color) if verbosity > 0
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
|
|
46
|
+
elsif !html && !parsed
|
|
46
47
|
puts "(no result: #{parser.parser.failure_reason})" if verbosity > 0
|
|
47
|
-
|
|
48
|
+
|
|
49
|
+
elsif html && parsed
|
|
50
|
+
puts('<div class="root">' + parsed.to_html + '</div>') if verbosity > 0
|
|
51
|
+
|
|
52
|
+
else
|
|
53
|
+
puts('<div class="root">' + CGI.escapeHTML(parsed) + '</div>') if verbosity > 0
|
|
48
54
|
end
|
|
55
|
+
|
|
56
|
+
return !!parsed
|
|
49
57
|
end
|
|
50
58
|
|
|
51
|
-
def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: false)
|
|
59
|
+
def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: false, html: false)
|
|
52
60
|
count_parsed = count_noresult = 0
|
|
53
61
|
File.foreach(path) do |line|
|
|
54
62
|
next if line =~ /^#/ # comment
|
|
@@ -59,7 +67,7 @@ def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: fa
|
|
|
59
67
|
count_parsed += 1 if parsed
|
|
60
68
|
count_noresult += 1 unless parsed
|
|
61
69
|
|
|
62
|
-
parse_single(line, parsed, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color)
|
|
70
|
+
parse_single(line, parsed, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color, html: html)
|
|
63
71
|
end
|
|
64
72
|
|
|
65
73
|
pct_parsed = 100.0 * count_parsed / (count_parsed + count_noresult)
|
|
@@ -75,6 +83,7 @@ print = nil
|
|
|
75
83
|
parser_name = :strict
|
|
76
84
|
escape = false
|
|
77
85
|
color = true
|
|
86
|
+
html = false
|
|
78
87
|
PARSERS = {
|
|
79
88
|
strict: FoodIngredientParser::Strict::Parser,
|
|
80
89
|
loose: FoodIngredientParser::Loose::Parser
|
|
@@ -95,6 +104,7 @@ OptionParser.new do |opts|
|
|
|
95
104
|
opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
|
|
96
105
|
opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
|
|
97
106
|
opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
|
|
107
|
+
opts.on( "--[no-]html", "Print as HTML with parsing markup") {|e| html = !!e }
|
|
98
108
|
opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
|
|
99
109
|
opts.on( "--version", "Show program version.") do
|
|
100
110
|
puts("food_ingredient_parser v#{FoodIngredientParser::VERSION}")
|
|
@@ -112,8 +122,8 @@ if strings.any? || files.any?
|
|
|
112
122
|
exit(1)
|
|
113
123
|
end
|
|
114
124
|
success = true
|
|
115
|
-
strings.each {|s| success &= parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
|
|
116
|
-
files.each {|f| success &= parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) == 0 }
|
|
125
|
+
strings.each {|s| success &= parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color, html: html) }
|
|
126
|
+
files.each {|f| success &= parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color, html: html) == 0 }
|
|
117
127
|
success or exit(1)
|
|
118
128
|
else
|
|
119
129
|
STDERR.puts("Please specify one or more --file or --string arguments (see --help).")
|
|
@@ -7,8 +7,8 @@ module FoodIngredientParser::Loose
|
|
|
7
7
|
#
|
|
8
8
|
# @note mark and amount is lost, this is not expected on e-numbers
|
|
9
9
|
|
|
10
|
-
SPLIT_RE = /\s
|
|
11
|
-
SINGLE_RE = /E
|
|
10
|
+
SPLIT_RE = /\s*(-|\ben\b|\band\b|\bund\b|\bet\b)\s*/.freeze
|
|
11
|
+
SINGLE_RE = /E(-|\s+)?\d{3}[a-z]?\s*(\([iv]+\)|\[[iv]+\])?/i.freeze
|
|
12
12
|
MATCH_RE = /\A\s*(#{SINGLE_RE})(?:#{SPLIT_RE}(#{SINGLE_RE}))+\s*\z/i.freeze
|
|
13
13
|
|
|
14
14
|
def self.transform!(node)
|
|
@@ -58,8 +58,8 @@ module FoodIngredientParser::Strict::Grammar
|
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
rule e_number
|
|
61
|
-
( 'E'i '-'? [0-9] [0-9] [0-9] [[:alpha:]]? )
|
|
62
|
-
( ( ws* '(' [iIvV]+ ')' ) / ![[:alnum:]] ) # e.g. "E450 (iii)"
|
|
61
|
+
( 'E'i ( '-' / ws+ )? [0-9] [0-9] [0-9] [[:alpha:]]? )
|
|
62
|
+
( ( ws* '(' [iIvV]+ ')' ) / ( ws* '[' [iIvV]+ ']' ) / ![[:alnum:]] ) # e.g. "E450 (iii)"
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
rule chem_systematic_name
|
|
@@ -5,15 +5,15 @@ module FoodIngredientParser::Strict::Grammar
|
|
|
5
5
|
include Ingredient
|
|
6
6
|
|
|
7
7
|
rule list_coloned
|
|
8
|
-
contains:( ( ws* list_coloned_ingredient ws* '
|
|
9
|
-
contains:( ( ws* list_coloned_ingredient ws* '
|
|
10
|
-
contains:( ( ws* list_coloned_ingredient ws* '.'
|
|
11
|
-
contains:( ( ws* list_coloned_ingredient ws* '.'
|
|
12
|
-
contains:( ( ws* list_coloned_ingredient ws* '
|
|
13
|
-
contains:( ( ws* list_coloned_ingredient ws* '
|
|
14
|
-
contains:( ( ws* list_coloned_ingredient ws* ';'
|
|
15
|
-
contains:( ( ws* list_coloned_ingredient ws* ';'
|
|
16
|
-
contains:( ws* list_coloned_ingredient )
|
|
8
|
+
contains:( ( ws* list_coloned_ingredient ws* '.' ws* ',' )+ ws* list_coloned_ingredient ) <ListNode> /
|
|
9
|
+
contains:( ( ws* list_coloned_ingredient ws* '.' ws* ',' )+ ) <ListNode> /
|
|
10
|
+
contains:( ( ws* list_coloned_ingredient ws* '.' )+ ws* list_coloned_ingredient ) <ListNode> /
|
|
11
|
+
contains:( ( ws* list_coloned_ingredient ws* '.' )+ ) <ListNode> /
|
|
12
|
+
contains:( ( ws* list_coloned_ingredient ws* ';' ws* ',' )+ ws* list_coloned_ingredient ) <ListNode> /
|
|
13
|
+
contains:( ( ws* list_coloned_ingredient ws* ';' ws* ',' )+ ) <ListNode> /
|
|
14
|
+
contains:( ( ws* list_coloned_ingredient ws* ';' )+ ws* list_coloned_ingredient ) <ListNode> /
|
|
15
|
+
contains:( ( ws* list_coloned_ingredient ws* ';' )+ ) <ListNode> /
|
|
16
|
+
contains:( ws* list_coloned_ingredient ) <ListNode>
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
rule list_coloned_inner_list
|
|
@@ -22,7 +22,7 @@ module FoodIngredientParser::Strict::Grammar
|
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
rule list_coloned_ingredient
|
|
25
|
-
ing:ingredient_simple_with_amount ws* ':' ws* amount:amount post:( ws* '}' )?
|
|
25
|
+
ing:ingredient_simple_with_amount ws* ':' ws* amount:amount post:( ws* '}' )? !( ws* word ) <IngredientNode> /
|
|
26
26
|
ing:ingredient_simple_with_amount ws* ':' post:( ws* '}' )? ws* contains:list_coloned_inner_list <NestedIngredientNode>
|
|
27
27
|
end
|
|
28
28
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: food_ingredient_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- wvengen
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-03-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: treetop
|