food_ingredient_parser 1.1.6 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e65fb234a122c893fcb4de453e4cac07851d5000b848fa99d2b356002e187a60
4
- data.tar.gz: c72d35c6fc978a6ddc139e3adf2059c24104221627f04857ba73aad159b29a0f
3
+ metadata.gz: 59825ee90990b2c4f52c9e59fae2e34e5b4558bc63a57fc59946db7f71335351
4
+ data.tar.gz: b8201945554a11fddbac8eb6676c4cbff8c1f5d3523780ec54125e3172fd05ac
5
5
  SHA512:
6
- metadata.gz: 295728820e893f1277bb06cfdad572df1fde9d496df230caf55f41b23d124677131b004a5ca0b2a2e4a25b2273f98feb18840c47c6361ec474082d66d4fde7fb
7
- data.tar.gz: 0ca48100273055b6c8f6e1d89bd1b44976e325cc9633080d6570e52cb6ef5d07dd419013d9b9aff67f95b6e4fac6ae12396c9248123daf4902d3eb97d84d1b28
6
+ metadata.gz: 3b97c863f9da5b26162883a3627809857fa9277e60e76f2c805312ceea34ad40317bcdf6ca2f1c56b2eafc4a5c637bbe0b5ac3c3c24ef8f874dd008bb5cc3bd7
7
+ data.tar.gz: bfcc88aea38c3db84670e84dd1166cbcdf8ce60edff297a2ed7324616ea53a4625ab137423f2f6701981a27a51b398ab652b207abe435328e4fa67974bad3f01
@@ -2,14 +2,14 @@ module FoodIngredientParser
2
2
  module Cleaner
3
3
 
4
4
  def self.clean(s)
5
- s.gsub!("\u00ad", "") # strip soft hyphen
6
- s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
7
- s.gsub!("", ",") # normalize unicode comma
8
- s.gsub!("aÄs", "aïs") # encoding issue for maïs
9
- s.gsub!("ï", "ï") # encoding issue
10
- s.gsub!("ë", "ë") # encoding issue
11
- s.gsub!(/\A\s*"(.*)"\s*\z/, '\1') # enclosing double quotation marks
12
- s.gsub!(/\A\s*'(.*)'\s*\z/, '\1') # enclosing single quotation marks
5
+ s.gsub!(/(_x005f_|_)x000d_/i, "\n") # fix sometimes encoding for newline
6
+ s.gsub!("\u00ad", "") # strip soft hyphen
7
+ s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
8
+ s.gsub!("", ",") # normalize unicode comma
9
+ s.gsub!("aÄs", "aïs") # encoding issue for maïs
10
+ s.gsub!("ï", "ï") # encoding issue
11
+ s.gsub!("ë", "ë") # encoding issue
12
+ s.gsub!(/\A\s*(["']+)(.*)\1\s*\z/, '\2') # enclosing quotation marks
13
13
  s
14
14
  end
15
15
 
@@ -4,7 +4,7 @@ module FoodIngredientParser::Loose
4
4
  class Scanner
5
5
 
6
6
  SEP_CHARS = "|;,.".freeze
7
- MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°▪◊#^˄*~".freeze
7
+ MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡⁺•°▪◊#^˄*~".freeze
8
8
  PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze
9
9
  NOTE_RE = /\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|allergie[- ]informatie(\s*:|\b)|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b|voor [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bis [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bgebruikt\b)/i.freeze
10
10
  # Keep in sync with +abbrev+ in the +Common+ grammar, plus relevant ones from the +Amount+ grammar.
@@ -21,22 +21,28 @@ module FoodIngredientParser::Strict::Grammar
21
21
  ) ws* )?
22
22
  amount_simple_quantity
23
23
  ( ws+ (
24
- 'of'i / 'or less of'i / 'or more of'i /
25
- 'minimaal'i / 'minimum'i / 'van het uitlekgewicht'i / 'van het geheel'i /
24
+ 'of a'i / 'of'i / 'or less of'i / 'or more of'i /
25
+ 'van een'i / 'minimaal'i / 'minimum'i / 'van het uitlekgewicht'i / 'van het geheel'i /
26
26
  'min.'i / 'min'i / 'max.'i / 'max'i
27
27
  ) )?
28
28
  end
29
29
 
30
30
  rule amount_simple_quantity
31
- amount_simple_number ( ws* amount_simple_unit? ws* dash ws* amount_simple_number )? ws* amount_simple_unit
31
+ amount_simple_number ( ws* amount_simple_unit? ws* dash ws* amount_simple_number )? ( ws* amount_simple_unit )?
32
32
  end
33
33
 
34
34
  rule amount_simple_number
35
- ( [±∓~∼∽≂≃≈≲≤<>≥≳] ws* )? number
35
+ ( amount_simple_comparator ws* )? number
36
+ end
37
+
38
+ rule amount_simple_comparator
39
+ '=' ws* [<>] /
40
+ [<>] ws* ( '=' / 'of gelijk aan'i !char / 'or equal to'i !char ) /
41
+ [±∓~∼∽≂≃≈≲≤<>≥≳] / '+/-' / '-/+'
36
42
  end
37
43
 
38
44
  rule amount_simple_unit
39
- ( percent / ( ( 'procent' / 'percent' / 'gram'i / 'ml'i / 'mg'i / 'gr'i / 'g'i ) !char ) )
45
+ ( percent / ( ( 'procent' / 'percent' / 'gram'i / 'ml'i / 'mg'i / 'gr'i / 'g'i / 'ppm'i ) !char ) )
40
46
  ( ws 'vol'i ( !char / '.' ) )?
41
47
  ( ws* '℮' )?
42
48
  end
@@ -23,9 +23,9 @@ module FoodIngredientParser::Strict::Grammar
23
23
  [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? /
24
24
  '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' /
25
25
  [˄^] digit /
26
- [†‡•°▪◊#˄^~˛] /
26
+ [†‡⁺•°▪◊#˄^~˛] /
27
27
  '*'+ /
28
- '(' ws* ( [†‡•°▪◊#˄^~˛] / '*'+ ) ws* ')'
28
+ '(' ws* ( [†‡⁺•°▪◊#˄^~˛] / '*'+ ) ws* ')'
29
29
  end
30
30
 
31
31
  rule digit
@@ -33,7 +33,8 @@ module FoodIngredientParser::Strict::Grammar
33
33
  end
34
34
 
35
35
  rule fraction
36
- [½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅐⅛⅜⅝⅞⅑⅒]
36
+ [½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅐⅛⅜⅝⅞⅑⅒] /
37
+ digit+ '/' digit+
37
38
  end
38
39
 
39
40
  rule percent
@@ -61,6 +62,20 @@ module FoodIngredientParser::Strict::Grammar
61
62
  ![[:alnum:]] / ( ws* '(' 'i'i+ ')' ) # e.g. "E450 (iii)"
62
63
  end
63
64
 
65
+ rule chem_systematic_name
66
+ ( chem_systematic_name_num dash ) ( chem_systematic_name_word dash chem_systematic_name_num dash ws? )* chem_systematic_name_word /
67
+ ( chem_systematic_name_word dash chem_systematic_name_num dash ws? )+ chem_systematic_name_word
68
+ end
69
+
70
+ rule chem_systematic_name_word
71
+ [A-Za-z]+ ( dash [A-Za-z]+ dash [A-Za-z]+ )*
72
+ end
73
+
74
+ rule chem_systematic_name_num
75
+ digit+ [RH] /
76
+ digit+ ( ',' digit+ )* '\''?
77
+ end
78
+
64
79
  rule abbrev
65
80
  # These are listed explicitely to avoid incorrect interpretations, and allow missing trailing dots.
66
81
  # To get an idea of what occurs (second one omits trailing dots):
@@ -115,6 +130,7 @@ module FoodIngredientParser::Strict::Grammar
115
130
  'w.o'i /
116
131
  'w.v'i /
117
132
  # not auto-generated additions
133
+ 'nr.'i /
118
134
  'vit'i / # vitamin
119
135
  'denat'i / # denaturated
120
136
  'alc'i / # alcohol
@@ -138,7 +154,8 @@ module FoodIngredientParser::Strict::Grammar
138
154
  'T.' ws* 'aestivum'i (ws+ 'vitt.'i)? /
139
155
  'nucifera' ws+ 'L.'i /
140
156
  'type'i ws+ '"' [0-9]+ '"' /
141
- e_number
157
+ e_number /
158
+ chem_systematic_name
142
159
  ) ![[:alpha:]]
143
160
  end
144
161
  end
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.1.6'
3
- VERSION_DATE = '2020-11-19'
2
+ VERSION = '1.1.7'
3
+ VERSION_DATE = '2020-12-28'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.6
4
+ version: 1.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-19 00:00:00.000000000 Z
11
+ date: 2020-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop