food_ingredient_parser 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41da81db767bedd583db2794f44d39d710ddef58
4
- data.tar.gz: 9bc24f8e83d04fb608e8fba4364aa26117fd898b
3
+ metadata.gz: 64ad7a10a1480b520602113bbcdfc10ba1daf8b5
4
+ data.tar.gz: 4068a9edbe1dca908228f38d2795ad63a5cbcf76
5
5
  SHA512:
6
- metadata.gz: 7634ebb4a8f3530cab515c5b27d2782df8c2521eb79df67ecbe08945360bcc27d68456d389d69a83808792145ecfb7094b3b97b5c33cbb17119642449813df88
7
- data.tar.gz: 3b5c5ed78b7c4e41211855523acaf2a4c519b0e021a32eb698a715f7dccf2beacde229593d8422b71ce7a5a55dc1e266347d263c6629a72cdb308556697f7d93
6
+ metadata.gz: 73ce876757b08e1d2cf0b5126e8d024b3728260134c3c4f3fe49fee14793da77ecc48d286165dc0c86e8363f2eddf6081355ac26d38f524371b367f4aa3cee23
7
+ data.tar.gz: befa97dc0fd4605cd2019a2cf7a39aa15d5dfa15acf118f10e9b28e104bbc9bd5925f28286e780acb142eb73a935dba26657f1c6fe9cb766e7f4f88310d5ce55
data/README.md CHANGED
@@ -65,10 +65,10 @@ Usage: bin/food_ingredient_parser [options] --file|-f <filename>
65
65
  -s, --string INGREDIENTS Parse specified ingredient list.
66
66
  -q, --[no-]quiet Only show summary.
67
67
  -p, --parsed Only show lines that were successfully parsed.
68
+ -n, --noresult Only show lines that had no result.
68
69
  -r, --parser PARSER Use specific parser (strict, loose).
69
70
  -e, --[no-]escape Escape newlines
70
71
  -c, --[no-]color Use color
71
- -n, --noresult Only show lines that had no result.
72
72
  -v, --[no-]verbose Show more data (parsed tree).
73
73
  --version Show program version.
74
74
  -h, --help Show this help
@@ -190,5 +190,4 @@ upgrade to `1.1`.
190
190
  [`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
191
191
  real-world ingredient lists found on the Dutch market. Each line contains one ingredient
192
192
  list (newlines are encoded as `\n`, empty lines and those starting with `#` are ignored).
193
- The strict parser currently parses about three quarter, while the loose parser returns
194
- something for all of them.
193
+ The strict parser currently parses 80%, while the loose parser returns something for all of them.
@@ -88,10 +88,10 @@ OptionParser.new do |opts|
88
88
 
89
89
  opts.on("-q", "--[no-]quiet", "Only show summary.") {|q| verbosity = q ? 0 : 1 }
90
90
  opts.on("-p", "--parsed", "Only show lines that were successfully parsed.") {|p| print = :parsed }
91
+ opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
91
92
  opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
92
93
  opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
93
94
  opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
94
- opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
95
95
  opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
96
96
  opts.on( "--version", "Show program version.") do
97
97
  puts("food_ingredient_parser v#{FoodIngredientParser::VERSION}")
@@ -2,6 +2,7 @@ require_relative '../cleaner'
2
2
  require_relative 'scanner'
3
3
  require_relative 'transform/amount'
4
4
  require_relative 'transform/handle_missing_name'
5
+ require_relative 'transform/split_e_numbers'
5
6
 
6
7
  module FoodIngredientParser::Loose
7
8
  class Parser
@@ -20,6 +21,7 @@ module FoodIngredientParser::Loose
20
21
  s = FoodIngredientParser::Cleaner.clean(s) if clean
21
22
  n = Scanner.new(s).scan
22
23
  n = Transform::Amount.transform!(n) if n
24
+ n = Transform::SplitENumbers.transform!(n) if n
23
25
  n = Transform::HandleMissingName.transform!(n) if n && normalize
24
26
  n
25
27
  end
@@ -0,0 +1,50 @@
1
+ require_relative '../node'
2
+
3
+ module FoodIngredientParser::Loose
4
+ module Transform
5
+ class SplitENumbers
6
+ # Transforms node tree to split e-number combinations.
7
+ #
8
+ # @note mark and amount is lost, this is not expected on e-numbers
9
+
10
+ SPLIT_RE = /\s*-\s*/.freeze
11
+ MATCH_RE = /\A\s*(e[0-9]{3}[a-z]?)(?:#{SPLIT_RE}(e[0-9]{3}[a-z]?))+\s*\z/i.freeze
12
+
13
+ def self.transform!(node)
14
+ new(node).transform!
15
+ end
16
+
17
+ def initialize(node)
18
+ @node = node
19
+ end
20
+
21
+ def transform!
22
+ transform_node!(@node)
23
+ @node
24
+ end
25
+
26
+ private
27
+
28
+ def transform_node!(node)
29
+ if node.contains.any?
30
+ node.contains.each {|n| transform_node!(n) }
31
+ elsif node.name && m = MATCH_RE.match(node.name.text_value)
32
+ i = 0
33
+ while m = node.name.text_value.match(SPLIT_RE, i)
34
+ node.contains << new_node(node, i, m.begin(0)-1)
35
+ i = m.end(0)
36
+ end
37
+ node.contains << new_node(node, i, node.name.interval.last) if i <= node.name.interval.last
38
+ node.name = nil
39
+ end
40
+ end
41
+
42
+ def new_node(node, begins, ends)
43
+ offset = node.name.interval.first
44
+ new_node = Node.new(node.input, offset + begins .. offset + ends)
45
+ new_node.name = Node.new(node.input, new_node.interval)
46
+ new_node
47
+ end
48
+ end
49
+ end
50
+ end
@@ -10,7 +10,8 @@ module FoodIngredientParser::Strict::Grammar
10
10
  end
11
11
 
12
12
  rule ingredient_coloned_inner_list
13
- contains:( ingredient_coloned_simple_with_amount_and_nest ws* ( '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
13
+ contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
14
+ contains:( ingredient_coloned_simple_with_amount_and_nest ( ws* '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
14
15
  end
15
16
 
16
17
  # @see IngredientSimple#ingredient_simple
@@ -14,5 +14,9 @@ module FoodIngredientParser::Strict::Grammar
14
14
  ing:ingredient_simple <IngredientNode>
15
15
  end
16
16
 
17
+ rule ingredient_simple_e_number
18
+ name:( [Ee] [0-9] [0-9] [0-9] [a-zA-Z]? ) ![a-zA-Z0-9] <IngredientNode>
19
+ end
20
+
17
21
  end
18
22
  end
@@ -13,6 +13,7 @@ module FoodIngredientParser::Strict::Grammar
13
13
  end
14
14
 
15
15
  rule list_coloned_inner_list
16
+ contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
16
17
  contains:( ingredient ( ws* ',' ws* ingredient )* ) <ListNode>
17
18
  end
18
19
 
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.1.1'
3
- VERSION_DATE = '2018-09-25'
2
+ VERSION = '1.1.2'
3
+ VERSION_DATE = '2018-09-28'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-25 00:00:00.000000000 Z
11
+ date: 2018-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -50,6 +50,7 @@ files:
50
50
  - lib/food_ingredient_parser/loose/transform/amount.rb
51
51
  - lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
52
52
  - lib/food_ingredient_parser/loose/transform/handle_missing_name.rb
53
+ - lib/food_ingredient_parser/loose/transform/split_e_numbers.rb
53
54
  - lib/food_ingredient_parser/strict/grammar.rb
54
55
  - lib/food_ingredient_parser/strict/grammar/amount.treetop
55
56
  - lib/food_ingredient_parser/strict/grammar/common.treetop