food_ingredient_parser 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/bin/food_ingredient_parser +1 -1
- data/lib/food_ingredient_parser/loose/parser.rb +2 -0
- data/lib/food_ingredient_parser/loose/transform/split_e_numbers.rb +50 -0
- data/lib/food_ingredient_parser/strict/grammar/ingredient_coloned.treetop +2 -1
- data/lib/food_ingredient_parser/strict/grammar/ingredient_simple.treetop +4 -0
- data/lib/food_ingredient_parser/strict/grammar/list_coloned.treetop +1 -0
- data/lib/food_ingredient_parser/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 64ad7a10a1480b520602113bbcdfc10ba1daf8b5
|
|
4
|
+
data.tar.gz: 4068a9edbe1dca908228f38d2795ad63a5cbcf76
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 73ce876757b08e1d2cf0b5126e8d024b3728260134c3c4f3fe49fee14793da77ecc48d286165dc0c86e8363f2eddf6081355ac26d38f524371b367f4aa3cee23
|
|
7
|
+
data.tar.gz: befa97dc0fd4605cd2019a2cf7a39aa15d5dfa15acf118f10e9b28e104bbc9bd5925f28286e780acb142eb73a935dba26657f1c6fe9cb766e7f4f88310d5ce55
|
data/README.md
CHANGED
|
@@ -65,10 +65,10 @@ Usage: bin/food_ingredient_parser [options] --file|-f <filename>
|
|
|
65
65
|
-s, --string INGREDIENTS Parse specified ingredient list.
|
|
66
66
|
-q, --[no-]quiet Only show summary.
|
|
67
67
|
-p, --parsed Only show lines that were successfully parsed.
|
|
68
|
+
-n, --noresult Only show lines that had no result.
|
|
68
69
|
-r, --parser PARSER Use specific parser (strict, loose).
|
|
69
70
|
-e, --[no-]escape Escape newlines
|
|
70
71
|
-c, --[no-]color Use color
|
|
71
|
-
-n, --noresult Only show lines that had no result.
|
|
72
72
|
-v, --[no-]verbose Show more data (parsed tree).
|
|
73
73
|
--version Show program version.
|
|
74
74
|
-h, --help Show this help
|
|
@@ -190,5 +190,4 @@ upgrade to `1.1`.
|
|
|
190
190
|
[`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
|
|
191
191
|
real-world ingredient lists found on the Dutch market. Each line contains one ingredient
|
|
192
192
|
list (newlines are encoded as `\n`, empty lines and those starting with `#` are ignored).
|
|
193
|
-
The strict parser currently parses
|
|
194
|
-
something for all of them.
|
|
193
|
+
The strict parser currently parses 80%, while the loose parser returns something for all of them.
|
data/bin/food_ingredient_parser
CHANGED
|
@@ -88,10 +88,10 @@ OptionParser.new do |opts|
|
|
|
88
88
|
|
|
89
89
|
opts.on("-q", "--[no-]quiet", "Only show summary.") {|q| verbosity = q ? 0 : 1 }
|
|
90
90
|
opts.on("-p", "--parsed", "Only show lines that were successfully parsed.") {|p| print = :parsed }
|
|
91
|
+
opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
|
|
91
92
|
opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
|
|
92
93
|
opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
|
|
93
94
|
opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
|
|
94
|
-
opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
|
|
95
95
|
opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
|
|
96
96
|
opts.on( "--version", "Show program version.") do
|
|
97
97
|
puts("food_ingredient_parser v#{FoodIngredientParser::VERSION}")
|
|
@@ -2,6 +2,7 @@ require_relative '../cleaner'
|
|
|
2
2
|
require_relative 'scanner'
|
|
3
3
|
require_relative 'transform/amount'
|
|
4
4
|
require_relative 'transform/handle_missing_name'
|
|
5
|
+
require_relative 'transform/split_e_numbers'
|
|
5
6
|
|
|
6
7
|
module FoodIngredientParser::Loose
|
|
7
8
|
class Parser
|
|
@@ -20,6 +21,7 @@ module FoodIngredientParser::Loose
|
|
|
20
21
|
s = FoodIngredientParser::Cleaner.clean(s) if clean
|
|
21
22
|
n = Scanner.new(s).scan
|
|
22
23
|
n = Transform::Amount.transform!(n) if n
|
|
24
|
+
n = Transform::SplitENumbers.transform!(n) if n
|
|
23
25
|
n = Transform::HandleMissingName.transform!(n) if n && normalize
|
|
24
26
|
n
|
|
25
27
|
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
require_relative '../node'
|
|
2
|
+
|
|
3
|
+
module FoodIngredientParser::Loose
|
|
4
|
+
module Transform
|
|
5
|
+
class SplitENumbers
|
|
6
|
+
# Transforms node tree to split e-number combinations.
|
|
7
|
+
#
|
|
8
|
+
# @note mark and amount is lost, this is not expected on e-numbers
|
|
9
|
+
|
|
10
|
+
SPLIT_RE = /\s*-\s*/.freeze
|
|
11
|
+
MATCH_RE = /\A\s*(e[0-9]{3}[a-z]?)(?:#{SPLIT_RE}(e[0-9]{3}[a-z]?))+\s*\z/i.freeze
|
|
12
|
+
|
|
13
|
+
def self.transform!(node)
|
|
14
|
+
new(node).transform!
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(node)
|
|
18
|
+
@node = node
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def transform!
|
|
22
|
+
transform_node!(@node)
|
|
23
|
+
@node
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def transform_node!(node)
|
|
29
|
+
if node.contains.any?
|
|
30
|
+
node.contains.each {|n| transform_node!(n) }
|
|
31
|
+
elsif node.name && m = MATCH_RE.match(node.name.text_value)
|
|
32
|
+
i = 0
|
|
33
|
+
while m = node.name.text_value.match(SPLIT_RE, i)
|
|
34
|
+
node.contains << new_node(node, i, m.begin(0)-1)
|
|
35
|
+
i = m.end(0)
|
|
36
|
+
end
|
|
37
|
+
node.contains << new_node(node, i, node.name.interval.last) if i <= node.name.interval.last
|
|
38
|
+
node.name = nil
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def new_node(node, begins, ends)
|
|
43
|
+
offset = node.name.interval.first
|
|
44
|
+
new_node = Node.new(node.input, offset + begins .. offset + ends)
|
|
45
|
+
new_node.name = Node.new(node.input, new_node.interval)
|
|
46
|
+
new_node
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -10,7 +10,8 @@ module FoodIngredientParser::Strict::Grammar
|
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
rule ingredient_coloned_inner_list
|
|
13
|
-
contains:(
|
|
13
|
+
contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
|
|
14
|
+
contains:( ingredient_coloned_simple_with_amount_and_nest ( ws* '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
|
|
14
15
|
end
|
|
15
16
|
|
|
16
17
|
# @see IngredientSimple#ingredient_simple
|
|
@@ -13,6 +13,7 @@ module FoodIngredientParser::Strict::Grammar
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
rule list_coloned_inner_list
|
|
16
|
+
contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
|
|
16
17
|
contains:( ingredient ( ws* ',' ws* ingredient )* ) <ListNode>
|
|
17
18
|
end
|
|
18
19
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: food_ingredient_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- wvengen
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-09-
|
|
11
|
+
date: 2018-09-28 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: treetop
|
|
@@ -50,6 +50,7 @@ files:
|
|
|
50
50
|
- lib/food_ingredient_parser/loose/transform/amount.rb
|
|
51
51
|
- lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
|
|
52
52
|
- lib/food_ingredient_parser/loose/transform/handle_missing_name.rb
|
|
53
|
+
- lib/food_ingredient_parser/loose/transform/split_e_numbers.rb
|
|
53
54
|
- lib/food_ingredient_parser/strict/grammar.rb
|
|
54
55
|
- lib/food_ingredient_parser/strict/grammar/amount.treetop
|
|
55
56
|
- lib/food_ingredient_parser/strict/grammar/common.treetop
|