food_ingredient_parser 1.0.0.pre.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f08e6fcc9422b83503d37b41111f3bd540c11909
4
- data.tar.gz: 16af14258ae67fa9b03b2b2196da9631d3dc6a9d
3
+ metadata.gz: 2c529b63bd3a9f6139ed10663b2cef70ff3d1dc6
4
+ data.tar.gz: a8effec91559e15920794c61b08a486e572032cf
5
5
  SHA512:
6
- metadata.gz: b1bfd6c713f0117cc8c13f4110624d270cb471096577e2dc47ea43f6f70664c88c9a2bec444bac407f9424b3c147245e16d04b5b0e9bf8442273c8d90c27955c
7
- data.tar.gz: 873de15303ea9bebb4ab9d3504851a4b364e1aef849f6c530235476a4d8e65867fe5426dd7b9274ecd99befb2de83746b8854a6ba08a24cb8caac7525973340b
6
+ metadata.gz: 35f27c7d83effc16962a65ac4c8c09fb5694373dbd3d2745c434c37ddcf3fc466264c0f10cbe5054876517839a81bf75ab0b1b9876098c1b04b5312138e06ea1
7
+ data.tar.gz: 28e517777928262b45836d899ff919f725e09ab6e116fd9f58545262b7ae7151821ae683e6d33146d3b5e20f5c02ad7217c4985267460645b0ba80e1ccc19751
data/README.md CHANGED
@@ -174,6 +174,17 @@ Even though the strict parser would not give a result, the loose parser returns:
174
174
  }
175
175
  ```
176
176
 
177
+ ## Compatibility
178
+
179
+ From the 1.0.0 release, the main interface will be stable. This comprises the two parser's `parse`
180
+ methods (incl. documented options), its `nil` result when parsing failed, and the parsed output's
181
+ `to_h` and `to_html` methods (where available). Please note that parsed node trees may be subject to
182
+ change, even within a major release. Within a minor release, node trees are expected to remain stable.
183
+
184
+ So if you only use the stable interface (`parse`, `to_h` and `to_html`), you can lock your version
185
+ to e.g. `~> 1.0`. If you depend on more, lock your version against e.g. `~> 1.0.0` and test when you
186
+ upgrade to `1.1`.
187
+
177
188
  ## Test data
178
189
 
179
190
  [`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
@@ -6,14 +6,14 @@ module FoodIngredientParser::Loose
6
6
  SEP_CHARS = "|;,.".freeze
7
7
  MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°#^*".freeze
8
8
  PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze
9
- NOTE_RE = /\A\b(dit product kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b)/i.freeze
9
+ NOTE_RE = /\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b)/i.freeze
10
10
  # Keep in sync with +abbrev+ in the +Common+ grammar, plus relevant ones from the +Amount+ grammar.
11
- ABBREV_RE = Regexp.union(%w[
11
+ ABBREV_RE = Regexp.union(/\A(N°|°C|(ijzer|chroom|koper)\s*\(I+\)\s*[[:alnum:]]+)\b/i, *%w[
12
12
  a.o.p b.g.a b.o.b c.a c.i d.e d.m.v d.o.c d.o.p d.s e.a e.g e.u f.i.l f.o.s i.a
13
13
  i.d i.e i.g.m.e i.g.p i.m.v i.o i.v.m l.s.l n.a n.b n.o n.v.t o.a o.b.v p.d.o
14
- p.g.i q.s s.l s.s t.o.v u.h.t v.g v.s w.a w.o w.v vit denat N° °C
14
+ p.g.i q.s s.l s.s t.o.v u.h.t v.g v.s w.a w.o w.v vit denat
15
15
  min max ca
16
- ].map {|s| /\A#{Regexp.escape(s)}\b\.?/})
16
+ ].map {|s| /\A#{Regexp.escape(s)}\b\.?/i}).freeze
17
17
 
18
18
  def initialize(s, index: 0)
19
19
  @s = s # input string
@@ -68,7 +68,8 @@ module FoodIngredientParser::Loose
68
68
  add_child
69
69
  elsif ":".include?(c) # another open nesting
70
70
  if @s[@i+1..-1] =~ /\A\s*(\(|\[)/
71
- # ignore if before an open bracket, then it's a regular nesting
71
+ # ignore colon before an open bracket, then it's a regular nesting
72
+ name_until_here
72
73
  else
73
74
  open_parent(auto_close: true)
74
75
  @iterator = :colon
@@ -129,16 +130,16 @@ module FoodIngredientParser::Loose
129
130
  chars.include?(c) && @s[@i-1..@i+1] !~ /\A\d.\d\z/
130
131
  end
131
132
 
132
- def is_mark?
133
- mark_len > 0 && @s[@i..@i+1] !~ /\A°[CF]/
133
+ def is_mark?(i = @i)
134
+ mark_len(i) > 0 && @s[i..i+1] !~ /\A°[CF]/
134
135
  end
135
136
 
136
- def mark_len
137
- i = @i
138
- while @s[i] && MARK_CHARS.include?(@s[i])
139
- i += 1
137
+ def mark_len(i = @i)
138
+ j = i
139
+ while @s[j] && MARK_CHARS.include?(@s[j])
140
+ j += 1
140
141
  end
141
- i - @i
142
+ j - i
142
143
  end
143
144
 
144
145
  def abbrev_len
@@ -162,8 +163,8 @@ module FoodIngredientParser::Loose
162
163
  end
163
164
 
164
165
  def add_child
166
+ name_until_here
165
167
  cur.ends(@i-1)
166
- cur.name ||= Node.new(@s, cur.interval)
167
168
  parent.send(@dest) << cur
168
169
  @cur = nil
169
170
  end
@@ -192,7 +193,11 @@ module FoodIngredientParser::Loose
192
193
  end
193
194
 
194
195
  def name_until_here
195
- cur.name ||= Node.new(@s, cur.interval.first .. @i-1)
196
+ cur.name ||= begin
197
+ i, j = cur.interval.first, @i - 1
198
+ i += mark_len(i) # skip any mark in front
199
+ Node.new(@s, i .. j) if j > i
200
+ end
196
201
  end
197
202
 
198
203
  def dot_is_not_sep?
@@ -44,7 +44,7 @@ module FoodIngredientParser::Strict::Grammar
44
44
  end
45
45
 
46
46
  rule word
47
- abbrev / char+
47
+ abbrev / word_complex / char+
48
48
  end
49
49
 
50
50
  rule and
@@ -104,13 +104,20 @@ module FoodIngredientParser::Strict::Grammar
104
104
  'w.a'i /
105
105
  'w.o'i /
106
106
  'w.v'i /
107
- # special words and abbreviations (not auto-generated)
108
- 'vit.'i /
109
- 'denat.'i /
110
- 'N°'i /
111
- '°C'i
107
+ # not auto-generated additions
108
+ 'vit'i /
109
+ 'denat'i
112
110
  )
113
111
  '.'? ![[:alpha:]]
114
112
  end
113
+
114
+ rule word_complex
115
+ # Complex words that contain characters that would otherwise be considered non-words.
116
+ (
117
+ 'N°'i /
118
+ '°C'i /
119
+ ( 'ijzer'i / 'chroom'i / 'koper'i ) ws* '(' 'I'i+ ')' ws* [[:alnum:]]+
120
+ ) ![[:alpha:]]
121
+ end
115
122
  end
116
123
  end
@@ -33,7 +33,7 @@ module FoodIngredientParser::Strict::Grammar
33
33
 
34
34
  # @see Common#word
35
35
  rule ingredient_coloned_word
36
- abbrev / ( !'/' char )+
36
+ abbrev / word_complex / ( !'/' char )+
37
37
  end
38
38
 
39
39
  end
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0.pre.9'
3
- VERSION_DATE = '2018-09-19'
2
+ VERSION = '1.0.0'
3
+ VERSION_DATE = '2018-09-21'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.pre.9
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-19 00:00:00.000000000 Z
11
+ date: 2018-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -81,9 +81,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
81
81
  version: '0'
82
82
  required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  requirements:
84
- - - ">"
84
+ - - ">="
85
85
  - !ruby/object:Gem::Version
86
- version: 1.3.1
86
+ version: '0'
87
87
  requirements: []
88
88
  rubyforge_project:
89
89
  rubygems_version: 2.6.13