food_ingredient_parser 1.1.6 → 1.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e65fb234a122c893fcb4de453e4cac07851d5000b848fa99d2b356002e187a60
4
- data.tar.gz: c72d35c6fc978a6ddc139e3adf2059c24104221627f04857ba73aad159b29a0f
3
+ metadata.gz: 59825ee90990b2c4f52c9e59fae2e34e5b4558bc63a57fc59946db7f71335351
4
+ data.tar.gz: b8201945554a11fddbac8eb6676c4cbff8c1f5d3523780ec54125e3172fd05ac
5
5
  SHA512:
6
- metadata.gz: 295728820e893f1277bb06cfdad572df1fde9d496df230caf55f41b23d124677131b004a5ca0b2a2e4a25b2273f98feb18840c47c6361ec474082d66d4fde7fb
7
- data.tar.gz: 0ca48100273055b6c8f6e1d89bd1b44976e325cc9633080d6570e52cb6ef5d07dd419013d9b9aff67f95b6e4fac6ae12396c9248123daf4902d3eb97d84d1b28
6
+ metadata.gz: 3b97c863f9da5b26162883a3627809857fa9277e60e76f2c805312ceea34ad40317bcdf6ca2f1c56b2eafc4a5c637bbe0b5ac3c3c24ef8f874dd008bb5cc3bd7
7
+ data.tar.gz: bfcc88aea38c3db84670e84dd1166cbcdf8ce60edff297a2ed7324616ea53a4625ab137423f2f6701981a27a51b398ab652b207abe435328e4fa67974bad3f01
@@ -2,14 +2,14 @@ module FoodIngredientParser
2
2
  module Cleaner
3
3
 
4
4
  def self.clean(s)
5
- s.gsub!("\u00ad", "") # strip soft hyphen
6
- s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
7
- s.gsub!("", ",") # normalize unicode comma
8
- s.gsub!("aÄs", "aïs") # encoding issue for maïs
9
- s.gsub!("ï", "ï") # encoding issue
10
- s.gsub!("ë", "ë") # encoding issue
11
- s.gsub!(/\A\s*"(.*)"\s*\z/, '\1') # enclosing double quotation marks
12
- s.gsub!(/\A\s*'(.*)'\s*\z/, '\1') # enclosing single quotation marks
5
+ s.gsub!(/(_x005f_|_)x000d_/i, "\n") # fix sometimes encoding for newline
6
+ s.gsub!("\u00ad", "") # strip soft hyphen
7
+ s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
8
+ s.gsub!("", ",") # normalize unicode comma
9
+ s.gsub!("aÄs", "aïs") # encoding issue for maïs
10
+ s.gsub!("ï", "ï") # encoding issue
11
+ s.gsub!("ë", "ë") # encoding issue
12
+ s.gsub!(/\A\s*(["']+)(.*)\1\s*\z/, '\2') # enclosing quotation marks
13
13
  s
14
14
  end
15
15
 
@@ -4,7 +4,7 @@ module FoodIngredientParser::Loose
4
4
  class Scanner
5
5
 
6
6
  SEP_CHARS = "|;,.".freeze
7
- MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°▪◊#^˄*~".freeze
7
+ MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡⁺•°▪◊#^˄*~".freeze
8
8
  PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze
9
9
  NOTE_RE = /\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|allergie[- ]informatie(\s*:|\b)|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b|voor [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bis [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bgebruikt\b)/i.freeze
10
10
  # Keep in sync with +abbrev+ in the +Common+ grammar, plus relevant ones from the +Amount+ grammar.
@@ -21,22 +21,28 @@ module FoodIngredientParser::Strict::Grammar
21
21
  ) ws* )?
22
22
  amount_simple_quantity
23
23
  ( ws+ (
24
- 'of'i / 'or less of'i / 'or more of'i /
25
- 'minimaal'i / 'minimum'i / 'van het uitlekgewicht'i / 'van het geheel'i /
24
+ 'of a'i / 'of'i / 'or less of'i / 'or more of'i /
25
+ 'van een'i / 'minimaal'i / 'minimum'i / 'van het uitlekgewicht'i / 'van het geheel'i /
26
26
  'min.'i / 'min'i / 'max.'i / 'max'i
27
27
  ) )?
28
28
  end
29
29
 
30
30
  rule amount_simple_quantity
31
- amount_simple_number ( ws* amount_simple_unit? ws* dash ws* amount_simple_number )? ws* amount_simple_unit
31
+ amount_simple_number ( ws* amount_simple_unit? ws* dash ws* amount_simple_number )? ( ws* amount_simple_unit )?
32
32
  end
33
33
 
34
34
  rule amount_simple_number
35
- ( [±∓~∼∽≂≃≈≲≤<>≥≳] ws* )? number
35
+ ( amount_simple_comparator ws* )? number
36
+ end
37
+
38
+ rule amount_simple_comparator
39
+ '=' ws* [<>] /
40
+ [<>] ws* ( '=' / 'of gelijk aan'i !char / 'or equal to'i !char ) /
41
+ [±∓~∼∽≂≃≈≲≤<>≥≳] / '+/-' / '-/+'
36
42
  end
37
43
 
38
44
  rule amount_simple_unit
39
- ( percent / ( ( 'procent' / 'percent' / 'gram'i / 'ml'i / 'mg'i / 'gr'i / 'g'i ) !char ) )
45
+ ( percent / ( ( 'procent' / 'percent' / 'gram'i / 'ml'i / 'mg'i / 'gr'i / 'g'i / 'ppm'i ) !char ) )
40
46
  ( ws 'vol'i ( !char / '.' ) )?
41
47
  ( ws* '℮' )?
42
48
  end
@@ -23,9 +23,9 @@ module FoodIngredientParser::Strict::Grammar
23
23
  [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? /
24
24
  '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' /
25
25
  [˄^] digit /
26
- [†‡•°▪◊#˄^~˛] /
26
+ [†‡⁺•°▪◊#˄^~˛] /
27
27
  '*'+ /
28
- '(' ws* ( [†‡•°▪◊#˄^~˛] / '*'+ ) ws* ')'
28
+ '(' ws* ( [†‡⁺•°▪◊#˄^~˛] / '*'+ ) ws* ')'
29
29
  end
30
30
 
31
31
  rule digit
@@ -33,7 +33,8 @@ module FoodIngredientParser::Strict::Grammar
33
33
  end
34
34
 
35
35
  rule fraction
36
- [½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅐⅛⅜⅝⅞⅑⅒]
36
+ [½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅐⅛⅜⅝⅞⅑⅒] /
37
+ digit+ '/' digit+
37
38
  end
38
39
 
39
40
  rule percent
@@ -61,6 +62,20 @@ module FoodIngredientParser::Strict::Grammar
61
62
  ![[:alnum:]] / ( ws* '(' 'i'i+ ')' ) # e.g. "E450 (iii)"
62
63
  end
63
64
 
65
+ rule chem_systematic_name
66
+ ( chem_systematic_name_num dash ) ( chem_systematic_name_word dash chem_systematic_name_num dash ws? )* chem_systematic_name_word /
67
+ ( chem_systematic_name_word dash chem_systematic_name_num dash ws? )+ chem_systematic_name_word
68
+ end
69
+
70
+ rule chem_systematic_name_word
71
+ [A-Za-z]+ ( dash [A-Za-z]+ dash [A-Za-z]+ )*
72
+ end
73
+
74
+ rule chem_systematic_name_num
75
+ digit+ [RH] /
76
+ digit+ ( ',' digit+ )* '\''?
77
+ end
78
+
64
79
  rule abbrev
65
80
  # These are listed explicitely to avoid incorrect interpretations, and allow missing trailing dots.
66
81
  # To get an idea of what occurs (second one omits trailing dots):
@@ -115,6 +130,7 @@ module FoodIngredientParser::Strict::Grammar
115
130
  'w.o'i /
116
131
  'w.v'i /
117
132
  # not auto-generated additions
133
+ 'nr.'i /
118
134
  'vit'i / # vitamin
119
135
  'denat'i / # denaturated
120
136
  'alc'i / # alcohol
@@ -138,7 +154,8 @@ module FoodIngredientParser::Strict::Grammar
138
154
  'T.' ws* 'aestivum'i (ws+ 'vitt.'i)? /
139
155
  'nucifera' ws+ 'L.'i /
140
156
  'type'i ws+ '"' [0-9]+ '"' /
141
- e_number
157
+ e_number /
158
+ chem_systematic_name
142
159
  ) ![[:alpha:]]
143
160
  end
144
161
  end
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.1.6'
3
- VERSION_DATE = '2020-11-19'
2
+ VERSION = '1.1.7'
3
+ VERSION_DATE = '2020-12-28'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.6
4
+ version: 1.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-19 00:00:00.000000000 Z
11
+ date: 2020-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop