food_ingredient_parser 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/bin/food_ingredient_parser +1 -1
- data/lib/food_ingredient_parser/loose/parser.rb +2 -0
- data/lib/food_ingredient_parser/loose/transform/split_e_numbers.rb +50 -0
- data/lib/food_ingredient_parser/strict/grammar/ingredient_coloned.treetop +2 -1
- data/lib/food_ingredient_parser/strict/grammar/ingredient_simple.treetop +4 -0
- data/lib/food_ingredient_parser/strict/grammar/list_coloned.treetop +1 -0
- data/lib/food_ingredient_parser/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64ad7a10a1480b520602113bbcdfc10ba1daf8b5
|
4
|
+
data.tar.gz: 4068a9edbe1dca908228f38d2795ad63a5cbcf76
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73ce876757b08e1d2cf0b5126e8d024b3728260134c3c4f3fe49fee14793da77ecc48d286165dc0c86e8363f2eddf6081355ac26d38f524371b367f4aa3cee23
|
7
|
+
data.tar.gz: befa97dc0fd4605cd2019a2cf7a39aa15d5dfa15acf118f10e9b28e104bbc9bd5925f28286e780acb142eb73a935dba26657f1c6fe9cb766e7f4f88310d5ce55
|
data/README.md
CHANGED
@@ -65,10 +65,10 @@ Usage: bin/food_ingredient_parser [options] --file|-f <filename>
|
|
65
65
|
-s, --string INGREDIENTS Parse specified ingredient list.
|
66
66
|
-q, --[no-]quiet Only show summary.
|
67
67
|
-p, --parsed Only show lines that were successfully parsed.
|
68
|
+
-n, --noresult Only show lines that had no result.
|
68
69
|
-r, --parser PARSER Use specific parser (strict, loose).
|
69
70
|
-e, --[no-]escape Escape newlines
|
70
71
|
-c, --[no-]color Use color
|
71
|
-
-n, --noresult Only show lines that had no result.
|
72
72
|
-v, --[no-]verbose Show more data (parsed tree).
|
73
73
|
--version Show program version.
|
74
74
|
-h, --help Show this help
|
@@ -190,5 +190,4 @@ upgrade to `1.1`.
|
|
190
190
|
[`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
|
191
191
|
real-world ingredient lists found on the Dutch market. Each line contains one ingredient
|
192
192
|
list (newlines are encoded as `\n`, empty lines and those starting with `#` are ignored).
|
193
|
-
The strict parser currently parses
|
194
|
-
something for all of them.
|
193
|
+
The strict parser currently parses 80%, while the loose parser returns something for all of them.
|
data/bin/food_ingredient_parser
CHANGED
@@ -88,10 +88,10 @@ OptionParser.new do |opts|
|
|
88
88
|
|
89
89
|
opts.on("-q", "--[no-]quiet", "Only show summary.") {|q| verbosity = q ? 0 : 1 }
|
90
90
|
opts.on("-p", "--parsed", "Only show lines that were successfully parsed.") {|p| print = :parsed }
|
91
|
+
opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
|
91
92
|
opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
|
92
93
|
opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
|
93
94
|
opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
|
94
|
-
opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
|
95
95
|
opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
|
96
96
|
opts.on( "--version", "Show program version.") do
|
97
97
|
puts("food_ingredient_parser v#{FoodIngredientParser::VERSION}")
|
@@ -2,6 +2,7 @@ require_relative '../cleaner'
|
|
2
2
|
require_relative 'scanner'
|
3
3
|
require_relative 'transform/amount'
|
4
4
|
require_relative 'transform/handle_missing_name'
|
5
|
+
require_relative 'transform/split_e_numbers'
|
5
6
|
|
6
7
|
module FoodIngredientParser::Loose
|
7
8
|
class Parser
|
@@ -20,6 +21,7 @@ module FoodIngredientParser::Loose
|
|
20
21
|
s = FoodIngredientParser::Cleaner.clean(s) if clean
|
21
22
|
n = Scanner.new(s).scan
|
22
23
|
n = Transform::Amount.transform!(n) if n
|
24
|
+
n = Transform::SplitENumbers.transform!(n) if n
|
23
25
|
n = Transform::HandleMissingName.transform!(n) if n && normalize
|
24
26
|
n
|
25
27
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require_relative '../node'
|
2
|
+
|
3
|
+
module FoodIngredientParser::Loose
|
4
|
+
module Transform
|
5
|
+
class SplitENumbers
|
6
|
+
# Transforms node tree to split e-number combinations.
|
7
|
+
#
|
8
|
+
# @note mark and amount is lost, this is not expected on e-numbers
|
9
|
+
|
10
|
+
SPLIT_RE = /\s*-\s*/.freeze
|
11
|
+
MATCH_RE = /\A\s*(e[0-9]{3}[a-z]?)(?:#{SPLIT_RE}(e[0-9]{3}[a-z]?))+\s*\z/i.freeze
|
12
|
+
|
13
|
+
def self.transform!(node)
|
14
|
+
new(node).transform!
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(node)
|
18
|
+
@node = node
|
19
|
+
end
|
20
|
+
|
21
|
+
def transform!
|
22
|
+
transform_node!(@node)
|
23
|
+
@node
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def transform_node!(node)
|
29
|
+
if node.contains.any?
|
30
|
+
node.contains.each {|n| transform_node!(n) }
|
31
|
+
elsif node.name && m = MATCH_RE.match(node.name.text_value)
|
32
|
+
i = 0
|
33
|
+
while m = node.name.text_value.match(SPLIT_RE, i)
|
34
|
+
node.contains << new_node(node, i, m.begin(0)-1)
|
35
|
+
i = m.end(0)
|
36
|
+
end
|
37
|
+
node.contains << new_node(node, i, node.name.interval.last) if i <= node.name.interval.last
|
38
|
+
node.name = nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def new_node(node, begins, ends)
|
43
|
+
offset = node.name.interval.first
|
44
|
+
new_node = Node.new(node.input, offset + begins .. offset + ends)
|
45
|
+
new_node.name = Node.new(node.input, new_node.interval)
|
46
|
+
new_node
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -10,7 +10,8 @@ module FoodIngredientParser::Strict::Grammar
|
|
10
10
|
end
|
11
11
|
|
12
12
|
rule ingredient_coloned_inner_list
|
13
|
-
contains:(
|
13
|
+
contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
|
14
|
+
contains:( ingredient_coloned_simple_with_amount_and_nest ( ws* '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
|
14
15
|
end
|
15
16
|
|
16
17
|
# @see IngredientSimple#ingredient_simple
|
@@ -13,6 +13,7 @@ module FoodIngredientParser::Strict::Grammar
|
|
13
13
|
end
|
14
14
|
|
15
15
|
rule list_coloned_inner_list
|
16
|
+
contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
|
16
17
|
contains:( ingredient ( ws* ',' ws* ingredient )* ) <ListNode>
|
17
18
|
end
|
18
19
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- lib/food_ingredient_parser/loose/transform/amount.rb
|
51
51
|
- lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
|
52
52
|
- lib/food_ingredient_parser/loose/transform/handle_missing_name.rb
|
53
|
+
- lib/food_ingredient_parser/loose/transform/split_e_numbers.rb
|
53
54
|
- lib/food_ingredient_parser/strict/grammar.rb
|
54
55
|
- lib/food_ingredient_parser/strict/grammar/amount.treetop
|
55
56
|
- lib/food_ingredient_parser/strict/grammar/common.treetop
|