food_ingredient_parser 1.0.0.pre.9 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f08e6fcc9422b83503d37b41111f3bd540c11909
4
- data.tar.gz: 16af14258ae67fa9b03b2b2196da9631d3dc6a9d
3
+ metadata.gz: 2c529b63bd3a9f6139ed10663b2cef70ff3d1dc6
4
+ data.tar.gz: a8effec91559e15920794c61b08a486e572032cf
5
5
  SHA512:
6
- metadata.gz: b1bfd6c713f0117cc8c13f4110624d270cb471096577e2dc47ea43f6f70664c88c9a2bec444bac407f9424b3c147245e16d04b5b0e9bf8442273c8d90c27955c
7
- data.tar.gz: 873de15303ea9bebb4ab9d3504851a4b364e1aef849f6c530235476a4d8e65867fe5426dd7b9274ecd99befb2de83746b8854a6ba08a24cb8caac7525973340b
6
+ metadata.gz: 35f27c7d83effc16962a65ac4c8c09fb5694373dbd3d2745c434c37ddcf3fc466264c0f10cbe5054876517839a81bf75ab0b1b9876098c1b04b5312138e06ea1
7
+ data.tar.gz: 28e517777928262b45836d899ff919f725e09ab6e116fd9f58545262b7ae7151821ae683e6d33146d3b5e20f5c02ad7217c4985267460645b0ba80e1ccc19751
data/README.md CHANGED
@@ -174,6 +174,17 @@ Even though the strict parser would not give a result, the loose parser returns:
174
174
  }
175
175
  ```
176
176
 
177
+ ## Compatibility
178
+
179
+ From the 1.0.0 release, the main interface will be stable. This comprises the two parser's `parse`
180
+ methods (incl. documented options), its `nil` result when parsing failed, and the parsed output's
181
+ `to_h` and `to_html` methods (where available). Please note that parsed node trees may be subject to
182
+ change, even within a major release. Within a minor release, node trees are expected to remain stable.
183
+
184
+ So if you only use the stable interface (`parse`, `to_h` and `to_html`), you can lock your version
185
+ to e.g. `~> 1.0`. If you depend on more, lock your version against e.g. `~> 1.0.0` and test when you
186
+ upgrade to `1.1`.
187
+
177
188
  ## Test data
178
189
 
179
190
  [`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
@@ -6,14 +6,14 @@ module FoodIngredientParser::Loose
6
6
  SEP_CHARS = "|;,.".freeze
7
7
  MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°#^*".freeze
8
8
  PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze
9
- NOTE_RE = /\A\b(dit product kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b)/i.freeze
9
+ NOTE_RE = /\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b)/i.freeze
10
10
  # Keep in sync with +abbrev+ in the +Common+ grammar, plus relevant ones from the +Amount+ grammar.
11
- ABBREV_RE = Regexp.union(%w[
11
+ ABBREV_RE = Regexp.union(/\A(N°|°C|(ijzer|chroom|koper)\s*\(I+\)\s*[[:alnum:]]+)\b/i, *%w[
12
12
  a.o.p b.g.a b.o.b c.a c.i d.e d.m.v d.o.c d.o.p d.s e.a e.g e.u f.i.l f.o.s i.a
13
13
  i.d i.e i.g.m.e i.g.p i.m.v i.o i.v.m l.s.l n.a n.b n.o n.v.t o.a o.b.v p.d.o
14
- p.g.i q.s s.l s.s t.o.v u.h.t v.g v.s w.a w.o w.v vit denat N° °C
14
+ p.g.i q.s s.l s.s t.o.v u.h.t v.g v.s w.a w.o w.v vit denat
15
15
  min max ca
16
- ].map {|s| /\A#{Regexp.escape(s)}\b\.?/})
16
+ ].map {|s| /\A#{Regexp.escape(s)}\b\.?/i}).freeze
17
17
 
18
18
  def initialize(s, index: 0)
19
19
  @s = s # input string
@@ -68,7 +68,8 @@ module FoodIngredientParser::Loose
68
68
  add_child
69
69
  elsif ":".include?(c) # another open nesting
70
70
  if @s[@i+1..-1] =~ /\A\s*(\(|\[)/
71
- # ignore if before an open bracket, then it's a regular nesting
71
+ # ignore colon before an open bracket, then it's a regular nesting
72
+ name_until_here
72
73
  else
73
74
  open_parent(auto_close: true)
74
75
  @iterator = :colon
@@ -129,16 +130,16 @@ module FoodIngredientParser::Loose
129
130
  chars.include?(c) && @s[@i-1..@i+1] !~ /\A\d.\d\z/
130
131
  end
131
132
 
132
- def is_mark?
133
- mark_len > 0 && @s[@i..@i+1] !~ /\A°[CF]/
133
+ def is_mark?(i = @i)
134
+ mark_len(i) > 0 && @s[i..i+1] !~ /\A°[CF]/
134
135
  end
135
136
 
136
- def mark_len
137
- i = @i
138
- while @s[i] && MARK_CHARS.include?(@s[i])
139
- i += 1
137
+ def mark_len(i = @i)
138
+ j = i
139
+ while @s[j] && MARK_CHARS.include?(@s[j])
140
+ j += 1
140
141
  end
141
- i - @i
142
+ j - i
142
143
  end
143
144
 
144
145
  def abbrev_len
@@ -162,8 +163,8 @@ module FoodIngredientParser::Loose
162
163
  end
163
164
 
164
165
  def add_child
166
+ name_until_here
165
167
  cur.ends(@i-1)
166
- cur.name ||= Node.new(@s, cur.interval)
167
168
  parent.send(@dest) << cur
168
169
  @cur = nil
169
170
  end
@@ -192,7 +193,11 @@ module FoodIngredientParser::Loose
192
193
  end
193
194
 
194
195
  def name_until_here
195
- cur.name ||= Node.new(@s, cur.interval.first .. @i-1)
196
+ cur.name ||= begin
197
+ i, j = cur.interval.first, @i - 1
198
+ i += mark_len(i) # skip any mark in front
199
+ Node.new(@s, i .. j) if j > i
200
+ end
196
201
  end
197
202
 
198
203
  def dot_is_not_sep?
@@ -44,7 +44,7 @@ module FoodIngredientParser::Strict::Grammar
44
44
  end
45
45
 
46
46
  rule word
47
- abbrev / char+
47
+ abbrev / word_complex / char+
48
48
  end
49
49
 
50
50
  rule and
@@ -104,13 +104,20 @@ module FoodIngredientParser::Strict::Grammar
104
104
  'w.a'i /
105
105
  'w.o'i /
106
106
  'w.v'i /
107
- # special words and abbreviations (not auto-generated)
108
- 'vit.'i /
109
- 'denat.'i /
110
- 'N°'i /
111
- '°C'i
107
+ # not auto-generated additions
108
+ 'vit'i /
109
+ 'denat'i
112
110
  )
113
111
  '.'? ![[:alpha:]]
114
112
  end
113
+
114
+ rule word_complex
115
+ # Complex words that contain characters that would otherwise be considered non-words.
116
+ (
117
+ 'N°'i /
118
+ '°C'i /
119
+ ( 'ijzer'i / 'chroom'i / 'koper'i ) ws* '(' 'I'i+ ')' ws* [[:alnum:]]+
120
+ ) ![[:alpha:]]
121
+ end
115
122
  end
116
123
  end
@@ -33,7 +33,7 @@ module FoodIngredientParser::Strict::Grammar
33
33
 
34
34
  # @see Common#word
35
35
  rule ingredient_coloned_word
36
- abbrev / ( !'/' char )+
36
+ abbrev / word_complex / ( !'/' char )+
37
37
  end
38
38
 
39
39
  end
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0.pre.9'
3
- VERSION_DATE = '2018-09-19'
2
+ VERSION = '1.0.0'
3
+ VERSION_DATE = '2018-09-21'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.pre.9
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-19 00:00:00.000000000 Z
11
+ date: 2018-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -81,9 +81,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
81
81
  version: '0'
82
82
  required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  requirements:
84
- - - ">"
84
+ - - ">="
85
85
  - !ruby/object:Gem::Version
86
- version: 1.3.1
86
+ version: '0'
87
87
  requirements: []
88
88
  rubyforge_project:
89
89
  rubygems_version: 2.6.13