food_ingredient_parser 1.0.0.pre.7 → 1.0.0.pre.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/food_ingredient_parser/loose/scanner.rb +1 -1
- data/lib/food_ingredient_parser/strict/grammar/common.treetop +2 -47
- data/lib/food_ingredient_parser/strict/grammar/ingredient_coloned.treetop +4 -5
- data/lib/food_ingredient_parser/strict/grammar/ingredient_simple.treetop +2 -5
- data/lib/food_ingredient_parser/strict/grammar/list_coloned.treetop +3 -3
- data/lib/food_ingredient_parser/strict/grammar/root.treetop +1 -1
- data/lib/food_ingredient_parser/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8042180a4a4fbc5233a5630c7e0cf8e4751182b
|
4
|
+
data.tar.gz: 26abeaf528a49a6f01a47eb114e35631d9347cc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d7c7972846a88046760de7d1c5857f426891502fdca4f50c0fad179f2a7580dd0b157aed106e9efa62af013e4a20d6d8e3be9c49ec8e4eeee326cb228e26c91
|
7
|
+
data.tar.gz: 872aadc53b40e991e156fde3bb89db69bebb52ca8993aadfbc1f3c862b76b0d42ac01b377da70af22cf6b909b24b063a16d59f1aab4ed6ddd4917e1196736c50
|
@@ -127,7 +127,7 @@ module FoodIngredientParser::Loose
|
|
127
127
|
def is_notes_start?
|
128
128
|
# @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
|
129
129
|
if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) || # "* = Biologisch"
|
130
|
-
( is_mark? && @s[@i-
|
130
|
+
( is_mark? && @s[@i-1] =~ /\s/ ) || # " **Biologisch"
|
131
131
|
( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
|
132
132
|
@i -= 1 # we want to include the mark in the note
|
133
133
|
true
|
@@ -13,7 +13,7 @@ module FoodIngredientParser::Strict::Grammar
|
|
13
13
|
[[:alnum:]] /
|
14
14
|
fraction /
|
15
15
|
[-/\`'´’+=_{}&] /
|
16
|
-
[
|
16
|
+
[®™] /
|
17
17
|
[¿?] / # weird characters turning up in names (e.g. encoding issues)
|
18
18
|
[₁₂₃₄₅₆₇₈₉] # can occur with vitamins
|
19
19
|
end
|
@@ -39,20 +39,10 @@ module FoodIngredientParser::Strict::Grammar
|
|
39
39
|
abbrev / char+
|
40
40
|
end
|
41
41
|
|
42
|
-
rule word_nas
|
43
|
-
# word, but don't include the trailing '-' that may occure before an 'and'
|
44
|
-
abbrev / ( !andsep char )+
|
45
|
-
end
|
46
|
-
|
47
42
|
rule and
|
48
43
|
( 'and' / 'en' / 'und' / '&' ) !char
|
49
44
|
end
|
50
45
|
|
51
|
-
# we want to match "a and b" but not "a- and bthing", this allows to avoid the latter
|
52
|
-
rule andsep
|
53
|
-
'-' ws+ and
|
54
|
-
end
|
55
|
-
|
56
46
|
rule abbrev
|
57
47
|
# These are listed explicitely to avoid incorrect interpretations, and allow missing trailing dots.
|
58
48
|
# To get an idea of what occurs (second one omits trailing dots):
|
@@ -107,44 +97,9 @@ module FoodIngredientParser::Strict::Grammar
|
|
107
97
|
'vit.'i /
|
108
98
|
'denat.'i /
|
109
99
|
'N°'i /
|
110
|
-
'°C'i
|
111
|
-
# word combinations that should not be split (not auto-generated)
|
112
|
-
# @todo this really would benefit from matching known ingredients instead of hardcoding
|
113
|
-
( 'oliën'i / 'olien'i / 'olië'i / 'olie'i ) ws+ and ws+ ( 'vetten'i / 'vet'i ) /
|
114
|
-
'palm'i ws+ and ws+ 'kokosvet'i /
|
115
|
-
color ( ws+ and ws+ color )+ /
|
116
|
-
color2 ( ws+ and ws+ color2 )+ /
|
117
|
-
'kruiden'i ws+ and ws+ 'specerijen'i /
|
118
|
-
'kruiden'i ws+ and ws+ 'specerij'i /
|
119
|
-
'specerijen'i ws+ and ws+ 'kruiden'i /
|
120
|
-
'vitamine'i 'n'i? ws+ and ws+ 'mineralen'i /
|
121
|
-
'lactose'i ws+ and ws+ 'melk'i ( 'eiwit'i 'en'i? )? /
|
122
|
-
'granen'i ws+ and ws+ 'zaden'i /
|
123
|
-
'gekookt'i [eE]? ws+ and ws+ 'gemarineerd'i [eE]? /
|
124
|
-
'mono'i ws+ and ws+ 'diglyceriden'i /
|
125
|
-
'guarpitmeel'i ws+ and ws+ 'natriumalginaat'i /
|
126
|
-
'vlees'i ws+ and ws+ 'dierlijke bijproducten'i /
|
127
|
-
'vis'i ws+ and ws+ 'visbijproducten'i /
|
128
|
-
'glucose'i ws+ and ws+ 'fructosestroop'i /
|
129
|
-
'ijzeroxiden'i ws+ and ws+ 'hydroxiden'i /
|
130
|
-
char+ 'sap'i ws+ and ws+ 'overige vruchtensappen'i /
|
131
|
-
char* 'sap'i ( ws+ 'uit concentraat'i / ws+ 'uit sapconcentraat'i )? ws+ and ws+ 'vruchten'i? 'puree'i /
|
132
|
-
( 'vit.'i / 'vitamine'i / 'vitamin' ) ws+ [a-zA-Z] [0-9]* ws+ and ws+ [a-zA-Z] [0-9]* /
|
133
|
-
( 'ijzer'i / 'chroom'i / 'koper'i ) ws* '(' 'I'+ ')' ws* [[:alnum:]]+
|
100
|
+
'°C'i
|
134
101
|
)
|
135
102
|
'.'? ![[:alpha:]]
|
136
103
|
end
|
137
|
-
|
138
|
-
rule color
|
139
|
-
# used for paprika, honey ("yellow and white honey") (nouns)
|
140
|
-
'red'i / 'green'i / 'yellow'i / 'white'i / 'black'i /
|
141
|
-
'rood'i / 'groen'i / 'geel'i / 'wit'i / 'zwart'i
|
142
|
-
end
|
143
|
-
|
144
|
-
rule color2
|
145
|
-
# adjective colors (can not occur together with noun colors in a list)
|
146
|
-
'rode'i / 'groene'i / 'gele'i / 'witte'i / 'zwarte'i
|
147
|
-
end
|
148
|
-
|
149
104
|
end
|
150
105
|
end
|
@@ -10,14 +10,13 @@ module FoodIngredientParser::Strict::Grammar
|
|
10
10
|
end
|
11
11
|
|
12
12
|
rule ingredient_coloned_inner_list
|
13
|
-
contains:( ingredient_coloned_simple_with_amount_and_nest ( ws+ and ws+ ingredient_coloned_simple_with_amount_and_nest )+ ) <ListNode> /
|
14
13
|
contains:( ingredient_coloned_simple_with_amount_and_nest ws* ( '/'+ ws* ingredient_coloned_simple_with_amount_and_nest )* ) <ListNode>
|
15
14
|
end
|
16
15
|
|
17
16
|
# @see IngredientSimple#ingredient_simple
|
18
17
|
rule ingredient_coloned_simple
|
19
|
-
name:(
|
20
|
-
name:(
|
18
|
+
name:( ingredient_coloned_word ( ws+ !amount ingredient_coloned_word )* ) ws? mark:mark <IngredientNode> /
|
19
|
+
name:( ingredient_coloned_word ( ws+ !amount ingredient_coloned_word )* ) <IngredientNode>
|
21
20
|
end
|
22
21
|
|
23
22
|
# @see IngredientSimple#ingredient_simple_with_amount
|
@@ -33,8 +32,8 @@ module FoodIngredientParser::Strict::Grammar
|
|
33
32
|
end
|
34
33
|
|
35
34
|
# @see Common#word
|
36
|
-
rule
|
37
|
-
abbrev / ( !'/'
|
35
|
+
rule ingredient_coloned_word
|
36
|
+
abbrev / ( !'/' char )+
|
38
37
|
end
|
39
38
|
|
40
39
|
end
|
@@ -4,11 +4,8 @@ module FoodIngredientParser::Strict::Grammar
|
|
4
4
|
include Amount
|
5
5
|
|
6
6
|
rule ingredient_simple
|
7
|
-
name:(
|
8
|
-
name:(
|
9
|
-
# We've tried to omit 'and' from the ingredient, but if it doesn't work out, do it anyway.
|
10
|
-
name:( word_nas ( andsep? ws+ !amount word_nas )* ) ws? mark:mark <IngredientNode> /
|
11
|
-
name:( word_nas ( andsep? ws+ !amount word_nas )* ) <IngredientNode>
|
7
|
+
name:( word ( ws+ !amount word )* ) ws? mark:mark <IngredientNode> /
|
8
|
+
name:( word ( ws+ !amount word )* ) <IngredientNode>
|
12
9
|
end
|
13
10
|
|
14
11
|
rule ingredient_simple_with_amount
|
@@ -5,9 +5,9 @@ module FoodIngredientParser::Strict::Grammar
|
|
5
5
|
include Ingredient
|
6
6
|
|
7
7
|
rule list_coloned
|
8
|
-
contains:( ( list_coloned_ingredient ws* '.'
|
9
|
-
contains:( ( list_coloned_ingredient ws* ';'
|
10
|
-
contains:( list_coloned_ingredient )
|
8
|
+
contains:( ( ws* list_coloned_ingredient ws* '.' )+ list_coloned_ingredient? ) <ListNode> /
|
9
|
+
contains:( ( ws* list_coloned_ingredient ws* ';' )+ list_coloned_ingredient? ) <ListNode> /
|
10
|
+
contains:( ws* list_coloned_ingredient ) <ListNode>
|
11
11
|
end
|
12
12
|
|
13
13
|
rule list_coloned_inner_list
|
@@ -11,7 +11,7 @@ module FoodIngredientParser::Strict::Grammar
|
|
11
11
|
contains:( list_newlined / list_coloned / list )
|
12
12
|
notes:(
|
13
13
|
root_mark_sentences_in_list? ws*
|
14
|
-
( (
|
14
|
+
( ( [.;] ws* newline* / [.;]? ws* newline+ ) ws* root_sentences? ws* )?
|
15
15
|
)
|
16
16
|
'"'?
|
17
17
|
<RootNode>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.pre.
|
4
|
+
version: 1.0.0.pre.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|