food_ingredient_parser 1.0.0.pre.7 → 1.0.0.pre.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e37a1957ac49914eef067ca584d1c7ba252847d8
4
- data.tar.gz: 91f97aa655ac16141ecbc0639c95579c52038666
3
+ metadata.gz: b8042180a4a4fbc5233a5630c7e0cf8e4751182b
4
+ data.tar.gz: 26abeaf528a49a6f01a47eb114e35631d9347cc7
5
5
  SHA512:
6
- metadata.gz: 4a80f6b15b71e9e597991dfced1fed8ae0f486434483e8ab37e102b8be51d01273bf88ddc29a8f934065b0921ec68152ce485793a75ab1a751c3ec439566d011
7
- data.tar.gz: 54ef1b2835363df9c72aab7a851cec7a555f04aefbeb41c0c865b142a08165df4172924d40e88d1f5ba1974e9f2203d65f23a6107a258d2185e27e0c054b2bb3
6
+ metadata.gz: 6d7c7972846a88046760de7d1c5857f426891502fdca4f50c0fad179f2a7580dd0b157aed106e9efa62af013e4a20d6d8e3be9c49ec8e4eeee326cb228e26c91
7
+ data.tar.gz: 872aadc53b40e991e156fde3bb89db69bebb52ca8993aadfbc1f3c862b76b0d42ac01b377da70af22cf6b909b24b063a16d59f1aab4ed6ddd4917e1196736c50
@@ -127,7 +127,7 @@ module FoodIngredientParser::Loose
127
127
  def is_notes_start?
128
128
  # @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
129
129
  if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) || # "* = Biologisch"
130
- ( is_mark? && @s[@i-2..@i-1] =~ /\A\s\s/ ) || # " **Biologisch"
130
+ ( is_mark? && @s[@i-1] =~ /\s/ ) || # " **Biologisch"
131
131
  ( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
132
132
  @i -= 1 # we want to include the mark in the note
133
133
  true
@@ -13,7 +13,7 @@ module FoodIngredientParser::Strict::Grammar
13
13
  [[:alnum:]] /
14
14
  fraction /
15
15
  [-/\`'´’+=_{}&] /
16
- [®] /
16
+ [®™] /
17
17
  [¿?] / # weird characters turning up in names (e.g. encoding issues)
18
18
  [₁₂₃₄₅₆₇₈₉] # can occur with vitamins
19
19
  end
@@ -39,20 +39,10 @@ module FoodIngredientParser::Strict::Grammar
39
39
  abbrev / char+
40
40
  end
41
41
 
42
- rule word_nas
43
- # word, but don't include the trailing '-' that may occure before an 'and'
44
- abbrev / ( !andsep char )+
45
- end
46
-
47
42
  rule and
48
43
  ( 'and' / 'en' / 'und' / '&' ) !char
49
44
  end
50
45
 
51
- # we want to match "a and b" but not "a- and bthing", this allows to avoid the latter
52
- rule andsep
53
- '-' ws+ and
54
- end
55
-
56
46
  rule abbrev
57
47
  # These are listed explicitely to avoid incorrect interpretations, and allow missing trailing dots.
58
48
  # To get an idea of what occurs (second one omits trailing dots):
@@ -107,44 +97,9 @@ module FoodIngredientParser::Strict::Grammar
107
97
  'vit.'i /
108
98
  'denat.'i /
109
99
  'N°'i /
110
- '°C'i /
111
- # word combinations that should not be split (not auto-generated)
112
- # @todo this really would benefit from matching known ingredients instead of hardcoding
113
- ( 'oliën'i / 'olien'i / 'olië'i / 'olie'i ) ws+ and ws+ ( 'vetten'i / 'vet'i ) /
114
- 'palm'i ws+ and ws+ 'kokosvet'i /
115
- color ( ws+ and ws+ color )+ /
116
- color2 ( ws+ and ws+ color2 )+ /
117
- 'kruiden'i ws+ and ws+ 'specerijen'i /
118
- 'kruiden'i ws+ and ws+ 'specerij'i /
119
- 'specerijen'i ws+ and ws+ 'kruiden'i /
120
- 'vitamine'i 'n'i? ws+ and ws+ 'mineralen'i /
121
- 'lactose'i ws+ and ws+ 'melk'i ( 'eiwit'i 'en'i? )? /
122
- 'granen'i ws+ and ws+ 'zaden'i /
123
- 'gekookt'i [eE]? ws+ and ws+ 'gemarineerd'i [eE]? /
124
- 'mono'i ws+ and ws+ 'diglyceriden'i /
125
- 'guarpitmeel'i ws+ and ws+ 'natriumalginaat'i /
126
- 'vlees'i ws+ and ws+ 'dierlijke bijproducten'i /
127
- 'vis'i ws+ and ws+ 'visbijproducten'i /
128
- 'glucose'i ws+ and ws+ 'fructosestroop'i /
129
- 'ijzeroxiden'i ws+ and ws+ 'hydroxiden'i /
130
- char+ 'sap'i ws+ and ws+ 'overige vruchtensappen'i /
131
- char* 'sap'i ( ws+ 'uit concentraat'i / ws+ 'uit sapconcentraat'i )? ws+ and ws+ 'vruchten'i? 'puree'i /
132
- ( 'vit.'i / 'vitamine'i / 'vitamin' ) ws+ [a-zA-Z] [0-9]* ws+ and ws+ [a-zA-Z] [0-9]* /
133
- ( 'ijzer'i / 'chroom'i / 'koper'i ) ws* '(' 'I'+ ')' ws* [[:alnum:]]+
100
+ '°C'i
134
101
  )
135
102
  '.'? ![[:alpha:]]
136
103
  end
137
-
138
- rule color
139
- # used for paprika, honey ("yellow and white honey") (nouns)
140
- 'red'i / 'green'i / 'yellow'i / 'white'i / 'black'i /
141
- 'rood'i / 'groen'i / 'geel'i / 'wit'i / 'zwart'i
142
- end
143
-
144
- rule color2
145
- # adjective colors (can not occur together with noun colors in a list)
146
- 'rode'i / 'groene'i / 'gele'i / 'witte'i / 'zwarte'i
147
- end
148
-
149
104
  end
150
105
  end
@@ -10,14 +10,13 @@ module FoodIngredientParser::Strict::Grammar
10
10
  end
11
11
 
12
12
  rule ingredient_coloned_inner_list
13
- contains:( ingredient_coloned_simple_with_amount_and_nest ( ws+ and ws+ ingredient_coloned_simple_with_amount_and_nest )+ ) <ListNode> /
14
13
  contains:( ingredient_coloned_simple_with_amount_and_nest ws* ( '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
15
14
  end
16
15
 
17
16
  # @see IngredientSimple#ingredient_simple
18
17
  rule ingredient_coloned_simple
19
- name:( ingredient_coloned_word_nas ( andsep? ws+ !amount !and ingredient_coloned_word_nas )* ) ws? mark:mark <IngredientNode> /
20
- name:( ingredient_coloned_word_nas ( andsep? ws+ !amount !and ingredient_coloned_word_nas )* ) <IngredientNode>
18
+ name:( ingredient_coloned_word ( ws+ !amount ingredient_coloned_word )* ) ws? mark:mark <IngredientNode> /
19
+ name:( ingredient_coloned_word ( ws+ !amount ingredient_coloned_word )* ) <IngredientNode>
21
20
  end
22
21
 
23
22
  # @see IngredientSimple#ingredient_simple_with_amount
@@ -33,8 +32,8 @@ module FoodIngredientParser::Strict::Grammar
33
32
  end
34
33
 
35
34
  # @see Common#word
36
- rule ingredient_coloned_word_nas
37
- abbrev / ( !'/' !andsep char )+
35
+ rule ingredient_coloned_word
36
+ abbrev / ( !'/' char )+
38
37
  end
39
38
 
40
39
  end
@@ -4,11 +4,8 @@ module FoodIngredientParser::Strict::Grammar
4
4
  include Amount
5
5
 
6
6
  rule ingredient_simple
7
- name:( word_nas ( andsep? ws+ !amount !and word_nas )* ) ws? mark:mark <IngredientNode> /
8
- name:( word_nas ( andsep? ws+ !amount !and word_nas )* ) <IngredientNode> /
9
- # We've tried to omit 'and' from the ingredient, but if it doesn't work out, do it anyway.
10
- name:( word_nas ( andsep? ws+ !amount word_nas )* ) ws? mark:mark <IngredientNode> /
11
- name:( word_nas ( andsep? ws+ !amount word_nas )* ) <IngredientNode>
7
+ name:( word ( ws+ !amount word )* ) ws? mark:mark <IngredientNode> /
8
+ name:( word ( ws+ !amount word )* ) <IngredientNode>
12
9
  end
13
10
 
14
11
  rule ingredient_simple_with_amount
@@ -5,9 +5,9 @@ module FoodIngredientParser::Strict::Grammar
5
5
  include Ingredient
6
6
 
7
7
  rule list_coloned
8
- contains:( ( list_coloned_ingredient ws* '.' ws* )+ list_coloned_ingredient? ) <ListNode> /
9
- contains:( ( list_coloned_ingredient ws* ';' ws* )+ list_coloned_ingredient? ) <ListNode> /
10
- contains:( list_coloned_ingredient ) <ListNode>
8
+ contains:( ( ws* list_coloned_ingredient ws* '.' )+ list_coloned_ingredient? ) <ListNode> /
9
+ contains:( ( ws* list_coloned_ingredient ws* ';' )+ list_coloned_ingredient? ) <ListNode> /
10
+ contains:( ws* list_coloned_ingredient ) <ListNode>
11
11
  end
12
12
 
13
13
  rule list_coloned_inner_list
@@ -11,7 +11,7 @@ module FoodIngredientParser::Strict::Grammar
11
11
  contains:( list_newlined / list_coloned / list )
12
12
  notes:(
13
13
  root_mark_sentences_in_list? ws*
14
- ( ( '.' ws* newline* / '.'? ws* newline+ ) ws* root_sentences? ws* )?
14
+ ( ( [.;] ws* newline* / [.;]? ws* newline+ ) ws* root_sentences? ws* )?
15
15
  )
16
16
  '"'?
17
17
  <RootNode>
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0.pre.7'
3
- VERSION_DATE = '2018-09-18'
2
+ VERSION = '1.0.0.pre.8'
3
+ VERSION_DATE = '2018-09-19'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.pre.7
4
+ version: 1.0.0.pre.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-18 00:00:00.000000000 Z
11
+ date: 2018-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop