food_ingredient_parser 1.1.5 → 1.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 8c80d036dbee183ed2fd1a8cc4e513e54318d142
4
- data.tar.gz: 6cfcd29eacb9e99a9be9a66a90446f47f714ae4b
2
+ SHA256:
3
+ metadata.gz: e65fb234a122c893fcb4de453e4cac07851d5000b848fa99d2b356002e187a60
4
+ data.tar.gz: c72d35c6fc978a6ddc139e3adf2059c24104221627f04857ba73aad159b29a0f
5
5
  SHA512:
6
- metadata.gz: 75b1f91e5db6bcfcc24ad8eabe16b541663e5e344604c31b884ab1a5633c95dc045cdee2d384f5434f5143b778659cecd69917b9adb16e25341978380e486bcc
7
- data.tar.gz: 7254cca971a558bda2ae6e996cc4d121fc5138f09d67c847bb44eec87421aeea39bf18a771137eb3b0b2bb734bed058a1d2347f815b9edc1b23e0b069d83a381
6
+ metadata.gz: 295728820e893f1277bb06cfdad572df1fde9d496df230caf55f41b23d124677131b004a5ca0b2a2e4a25b2273f98feb18840c47c6361ec474082d66d4fde7fb
7
+ data.tar.gz: 0ca48100273055b6c8f6e1d89bd1b44976e325cc9633080d6570e52cb6ef5d07dd419013d9b9aff67f95b6e4fac6ae12396c9248123daf4902d3eb97d84d1b28
@@ -41,8 +41,10 @@ def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false
41
41
  if parsed
42
42
  puts(parsed.inspect) if verbosity > 1
43
43
  pp(parsed.to_h, color: color) if verbosity > 0
44
+ return true
44
45
  else
45
46
  puts "(no result: #{parser.parser.failure_reason})" if verbosity > 0
47
+ return false
46
48
  end
47
49
  end
48
50
 
@@ -63,6 +65,7 @@ def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: fa
63
65
  pct_parsed = 100.0 * count_parsed / (count_parsed + count_noresult)
64
66
  pct_noresult = 100.0 * count_noresult / (count_parsed + count_noresult)
65
67
  puts "parsed #{colorize(color && "1;32", count_parsed)} (#{pct_parsed.round(1)}%), no result #{colorize(color && "1;31", count_noresult)} (#{pct_noresult.round(1)}%)"
68
+ return count_noresult
66
69
  end
67
70
 
68
71
  verbosity = 1
@@ -108,8 +111,10 @@ if strings.any? || files.any?
108
111
  STDERR.puts("Please specify one of the known parsers: #{PARSERS.keys.join(", ")}.")
109
112
  exit(1)
110
113
  end
111
- strings.each {|s| parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
112
- files.each {|f| parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
114
+ success = true
115
+ strings.each {|s| success &= parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
116
+ files.each {|f| success &= parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) == 0 }
117
+ success or exit(1)
113
118
  else
114
119
  STDERR.puts("Please specify one or more --file or --string arguments (see --help).")
115
120
  end
@@ -4,6 +4,7 @@ module FoodIngredientParser
4
4
  def self.clean(s)
5
5
  s.gsub!("\u00ad", "") # strip soft hyphen
6
6
  s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
7
+ s.gsub!("‚", ",") # normalize unicode comma
7
8
  s.gsub!("aÄs", "aïs") # encoding issue for maïs
8
9
  s.gsub!("ï", "ï") # encoding issue
9
10
  s.gsub!("ë", "ë") # encoding issue
@@ -4,7 +4,7 @@ module FoodIngredientParser::Loose
4
4
  class Scanner
5
5
 
6
6
  SEP_CHARS = "|;,.".freeze
7
- MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°▪◊#^*".freeze
7
+ MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°▪◊#^˄*~".freeze
8
8
  PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze
9
9
  NOTE_RE = /\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|allergie[- ]informatie(\s*:|\b)|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b|voor [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bis [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bgebruikt\b)/i.freeze
10
10
  # Keep in sync with +abbrev+ in the +Common+ grammar, plus relevant ones from the +Amount+ grammar.
@@ -17,6 +17,7 @@ module FoodIngredientParser::Loose
17
17
  L\.\s+rhamnosus\b | L\.\s+acidophilus\b | L\.\s+casei\b | B\.\s+lactis | A\.\s+oryzae |
18
18
  S\.\s+thermophilus\b | L\.\sbulgaricus\b |
19
19
  T\.\s*aestivum\b(\s+vitt\.)? |
20
+ nucifera\s+L\. |
20
21
  type\s+"\d+" |
21
22
  E-e?\d{3}[a-z]?\s*\(i+\) |
22
23
  www\.[-_\/:%.A-Za-z0-9]+
@@ -12,15 +12,20 @@ module FoodIngredientParser::Strict::Grammar
12
12
  rule char
13
13
  !mark [[:alnum:]] /
14
14
  fraction /
15
- [-/\`'´‘’+=_{}&] /
16
- [®™] /
17
- [¿?] / # weird characters turning up in names (e.g. encoding issues)
15
+ [-/\`'"´‘’+=_{}&] /
16
+ [®©™♣] /
17
+ [¿?¯] / # weird characters turning up in names (e.g. encoding issues)
18
18
  [₁₂₃₄₅₆₇₈₉] # can occur with vitamins
19
19
  end
20
20
 
21
21
  rule mark
22
22
  # mark referencing a footnote
23
- [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? / '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' / [†‡•°▪◊#^] / '*'+ / '(' ws* ( [†‡•°▪◊#^] / '*'+ ) ws* ')'
23
+ [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾'? /
24
+ '⁽' [¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº] '⁾' /
25
+ [˄^] digit /
26
+ [†‡•°▪◊#˄^~˛] /
27
+ '*'+ /
28
+ '(' ws* ( [†‡•°▪◊#˄^~˛] / '*'+ ) ws* ')'
24
29
  end
25
30
 
26
31
  rule digit
@@ -131,6 +136,7 @@ module FoodIngredientParser::Strict::Grammar
131
136
  'L.' ws+ 'rhamnosus'i / 'L.' ws+ 'acidophilus'i / 'L.' ws+ 'casei' / 'B.'i ws+ 'lactis'i / 'A.'i ws+ 'oryzae'i /
132
137
  'S.' ws+ 'thermophilus'i / 'L.' ws+ 'bulgaricus'i /
133
138
  'T.' ws* 'aestivum'i (ws+ 'vitt.'i)? /
139
+ 'nucifera' ws+ 'L.'i /
134
140
  'type'i ws+ '"' [0-9]+ '"' /
135
141
  e_number
136
142
  ) ![[:alpha:]]
@@ -40,7 +40,7 @@ module FoodIngredientParser::Strict::Grammar
40
40
  end
41
41
 
42
42
  rule root_mark_sentences_in_list
43
- ( ( ws* [,.;] / ws ) ws* root_mark_sentence_in_list )+
43
+ ( ( ws* [,.;] / ws )+ root_mark_sentence_in_list )+
44
44
  end
45
45
 
46
46
  rule root_mark_sentence_in_list
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.1.5'
3
- VERSION_DATE = '2019-11-14'
2
+ VERSION = '1.1.6'
3
+ VERSION_DATE = '2020-11-19'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.5
4
+ version: 1.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-14 00:00:00.000000000 Z
11
+ date: 2020-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -87,8 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
87
87
  - !ruby/object:Gem::Version
88
88
  version: '0'
89
89
  requirements: []
90
- rubyforge_project:
91
- rubygems_version: 2.6.13
90
+ rubygems_version: 3.0.3
92
91
  signing_key:
93
92
  specification_version: 4
94
93
  summary: Parser for ingredient lists found on food products.