food_ingredient_parser 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41da81db767bedd583db2794f44d39d710ddef58
4
- data.tar.gz: 9bc24f8e83d04fb608e8fba4364aa26117fd898b
3
+ metadata.gz: 64ad7a10a1480b520602113bbcdfc10ba1daf8b5
4
+ data.tar.gz: 4068a9edbe1dca908228f38d2795ad63a5cbcf76
5
5
  SHA512:
6
- metadata.gz: 7634ebb4a8f3530cab515c5b27d2782df8c2521eb79df67ecbe08945360bcc27d68456d389d69a83808792145ecfb7094b3b97b5c33cbb17119642449813df88
7
- data.tar.gz: 3b5c5ed78b7c4e41211855523acaf2a4c519b0e021a32eb698a715f7dccf2beacde229593d8422b71ce7a5a55dc1e266347d263c6629a72cdb308556697f7d93
6
+ metadata.gz: 73ce876757b08e1d2cf0b5126e8d024b3728260134c3c4f3fe49fee14793da77ecc48d286165dc0c86e8363f2eddf6081355ac26d38f524371b367f4aa3cee23
7
+ data.tar.gz: befa97dc0fd4605cd2019a2cf7a39aa15d5dfa15acf118f10e9b28e104bbc9bd5925f28286e780acb142eb73a935dba26657f1c6fe9cb766e7f4f88310d5ce55
data/README.md CHANGED
@@ -65,10 +65,10 @@ Usage: bin/food_ingredient_parser [options] --file|-f <filename>
65
65
  -s, --string INGREDIENTS Parse specified ingredient list.
66
66
  -q, --[no-]quiet Only show summary.
67
67
  -p, --parsed Only show lines that were successfully parsed.
68
+ -n, --noresult Only show lines that had no result.
68
69
  -r, --parser PARSER Use specific parser (strict, loose).
69
70
  -e, --[no-]escape Escape newlines
70
71
  -c, --[no-]color Use color
71
- -n, --noresult Only show lines that had no result.
72
72
  -v, --[no-]verbose Show more data (parsed tree).
73
73
  --version Show program version.
74
74
  -h, --help Show this help
@@ -190,5 +190,4 @@ upgrade to `1.1`.
190
190
  [`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
191
191
  real-world ingredient lists found on the Dutch market. Each line contains one ingredient
192
192
  list (newlines are encoded as `\n`, empty lines and those starting with `#` are ignored).
193
- The strict parser currently parses about three quarter, while the loose parser returns
194
- something for all of them.
193
+ The strict parser currently parses 80%, while the loose parser returns something for all of them.
@@ -88,10 +88,10 @@ OptionParser.new do |opts|
88
88
 
89
89
  opts.on("-q", "--[no-]quiet", "Only show summary.") {|q| verbosity = q ? 0 : 1 }
90
90
  opts.on("-p", "--parsed", "Only show lines that were successfully parsed.") {|p| print = :parsed }
91
+ opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
91
92
  opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
92
93
  opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
93
94
  opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
94
- opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
95
95
  opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
96
96
  opts.on( "--version", "Show program version.") do
97
97
  puts("food_ingredient_parser v#{FoodIngredientParser::VERSION}")
@@ -2,6 +2,7 @@ require_relative '../cleaner'
2
2
  require_relative 'scanner'
3
3
  require_relative 'transform/amount'
4
4
  require_relative 'transform/handle_missing_name'
5
+ require_relative 'transform/split_e_numbers'
5
6
 
6
7
  module FoodIngredientParser::Loose
7
8
  class Parser
@@ -20,6 +21,7 @@ module FoodIngredientParser::Loose
20
21
  s = FoodIngredientParser::Cleaner.clean(s) if clean
21
22
  n = Scanner.new(s).scan
22
23
  n = Transform::Amount.transform!(n) if n
24
+ n = Transform::SplitENumbers.transform!(n) if n
23
25
  n = Transform::HandleMissingName.transform!(n) if n && normalize
24
26
  n
25
27
  end
@@ -0,0 +1,50 @@
1
+ require_relative '../node'
2
+
3
+ module FoodIngredientParser::Loose
4
+ module Transform
5
+ class SplitENumbers
6
+ # Transforms node tree to split e-number combinations.
7
+ #
8
+ # @note mark and amount is lost, this is not expected on e-numbers
9
+
10
+ SPLIT_RE = /\s*-\s*/.freeze
11
+ MATCH_RE = /\A\s*(e[0-9]{3}[a-z]?)(?:#{SPLIT_RE}(e[0-9]{3}[a-z]?))+\s*\z/i.freeze
12
+
13
+ def self.transform!(node)
14
+ new(node).transform!
15
+ end
16
+
17
+ def initialize(node)
18
+ @node = node
19
+ end
20
+
21
+ def transform!
22
+ transform_node!(@node)
23
+ @node
24
+ end
25
+
26
+ private
27
+
28
+ def transform_node!(node)
29
+ if node.contains.any?
30
+ node.contains.each {|n| transform_node!(n) }
31
+ elsif node.name && m = MATCH_RE.match(node.name.text_value)
32
+ i = 0
33
+ while m = node.name.text_value.match(SPLIT_RE, i)
34
+ node.contains << new_node(node, i, m.begin(0)-1)
35
+ i = m.end(0)
36
+ end
37
+ node.contains << new_node(node, i, node.name.interval.last) if i <= node.name.interval.last
38
+ node.name = nil
39
+ end
40
+ end
41
+
42
+ def new_node(node, begins, ends)
43
+ offset = node.name.interval.first
44
+ new_node = Node.new(node.input, offset + begins .. offset + ends)
45
+ new_node.name = Node.new(node.input, new_node.interval)
46
+ new_node
47
+ end
48
+ end
49
+ end
50
+ end
@@ -10,7 +10,8 @@ module FoodIngredientParser::Strict::Grammar
10
10
  end
11
11
 
12
12
  rule ingredient_coloned_inner_list
13
- contains:( ingredient_coloned_simple_with_amount_and_nest ws* ( '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
13
+ contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
14
+ contains:( ingredient_coloned_simple_with_amount_and_nest ( ws* '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
14
15
  end
15
16
 
16
17
  # @see IngredientSimple#ingredient_simple
@@ -14,5 +14,9 @@ module FoodIngredientParser::Strict::Grammar
14
14
  ing:ingredient_simple <IngredientNode>
15
15
  end
16
16
 
17
+ rule ingredient_simple_e_number
18
+ name:( [Ee] [0-9] [0-9] [0-9] [a-zA-Z]? ) ![a-zA-Z0-9] <IngredientNode>
19
+ end
20
+
17
21
  end
18
22
  end
@@ -13,6 +13,7 @@ module FoodIngredientParser::Strict::Grammar
13
13
  end
14
14
 
15
15
  rule list_coloned_inner_list
16
+ contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
16
17
  contains:( ingredient ( ws* ',' ws* ingredient )* ) <ListNode>
17
18
  end
18
19
 
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.1.1'
3
- VERSION_DATE = '2018-09-25'
2
+ VERSION = '1.1.2'
3
+ VERSION_DATE = '2018-09-28'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-25 00:00:00.000000000 Z
11
+ date: 2018-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -50,6 +50,7 @@ files:
50
50
  - lib/food_ingredient_parser/loose/transform/amount.rb
51
51
  - lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
52
52
  - lib/food_ingredient_parser/loose/transform/handle_missing_name.rb
53
+ - lib/food_ingredient_parser/loose/transform/split_e_numbers.rb
53
54
  - lib/food_ingredient_parser/strict/grammar.rb
54
55
  - lib/food_ingredient_parser/strict/grammar/amount.treetop
55
56
  - lib/food_ingredient_parser/strict/grammar/common.treetop