openfoodfacts 0.6.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -3
  3. data/Rakefile +7 -5
  4. data/lib/openfoodfacts/additive.rb +9 -13
  5. data/lib/openfoodfacts/allergen.rb +4 -6
  6. data/lib/openfoodfacts/brand.rb +5 -7
  7. data/lib/openfoodfacts/category.rb +4 -6
  8. data/lib/openfoodfacts/city.rb +4 -6
  9. data/lib/openfoodfacts/contributor.rb +4 -6
  10. data/lib/openfoodfacts/country.rb +4 -6
  11. data/lib/openfoodfacts/entry_date.rb +5 -7
  12. data/lib/openfoodfacts/faq.rb +11 -16
  13. data/lib/openfoodfacts/ingredient.rb +31 -0
  14. data/lib/openfoodfacts/ingredient_that_may_be_from_palm_oil.rb +4 -6
  15. data/lib/openfoodfacts/label.rb +4 -6
  16. data/lib/openfoodfacts/language.rb +5 -7
  17. data/lib/openfoodfacts/last_edit_date.rb +5 -7
  18. data/lib/openfoodfacts/locale.rb +12 -14
  19. data/lib/openfoodfacts/manufacturing_place.rb +4 -6
  20. data/lib/openfoodfacts/mission.rb +20 -21
  21. data/lib/openfoodfacts/number_of_ingredients.rb +5 -7
  22. data/lib/openfoodfacts/nutrition_grade.rb +5 -7
  23. data/lib/openfoodfacts/origin.rb +4 -6
  24. data/lib/openfoodfacts/packager_code.rb +4 -6
  25. data/lib/openfoodfacts/packaging.rb +4 -6
  26. data/lib/openfoodfacts/period_after_opening.rb +31 -0
  27. data/lib/openfoodfacts/press.rb +16 -13
  28. data/lib/openfoodfacts/product.rb +132 -67
  29. data/lib/openfoodfacts/product_state.rb +5 -7
  30. data/lib/openfoodfacts/purchase_place.rb +4 -6
  31. data/lib/openfoodfacts/store.rb +4 -6
  32. data/lib/openfoodfacts/trace.rb +4 -6
  33. data/lib/openfoodfacts/user.rb +16 -15
  34. data/lib/openfoodfacts/version.rb +1 -1
  35. data/lib/openfoodfacts.rb +11 -2
  36. metadata +17 -18
  37. data/test/minitest_helper.rb +0 -18
  38. data/test/test_openfoodfacts.rb +0 -329
@@ -1,89 +1,139 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'cgi'
2
4
  require 'hashie'
3
5
  require 'net/http'
4
6
  require 'nokogiri'
5
- require 'open-uri'
6
7
 
7
8
  module Openfoodfacts
8
9
  class Product < Hashie::Mash
9
-
10
+ # disable_warnings
10
11
  # TODO: Add more locales
11
12
  LOCALE_WEBURL_PREFIXES = {
12
13
  'fr' => 'produit',
13
14
  'uk' => 'product',
14
15
  'us' => 'product',
15
16
  'world' => 'product'
16
- }
17
+ }.freeze
17
18
 
18
19
  class << self
19
-
20
20
  # Get product
21
21
  #
22
22
  def get(code, locale: DEFAULT_LOCALE)
23
- if code
24
- product_url = url(code, locale: locale)
25
- json = URI.open(product_url).read
26
- hash = JSON.parse(json)
23
+ return unless code
27
24
 
28
- new(hash["product"]) if !hash["status"].nil? && hash["status"] == 1
29
- end
25
+ product_url = url(code, locale: locale)
26
+ json = Openfoodfacts.http_get(product_url).read
27
+ hash = JSON.parse(json)
28
+
29
+ new(hash['product']) if !hash['status'].nil? && hash['status'] == 1
30
30
  end
31
- alias_method :find, :get
31
+ alias find get
32
32
 
33
33
  # Return product API URL
34
34
  #
35
35
  def url(code, locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
36
- if code
37
- path = "api/v0/produit/#{code}.json"
38
- "https://#{locale}.#{domain}/#{path}"
39
- end
36
+ return unless code
37
+
38
+ prefix = LOCALE_WEBURL_PREFIXES[locale]
39
+ path = "api/v2/#{prefix}/#{code}.json"
40
+ "https://#{locale}.#{domain}/#{path}"
40
41
  end
41
42
 
42
43
  # Search products
43
44
  #
44
- def search(terms, locale: DEFAULT_LOCALE, page: 1, page_size: 20, sort_by: 'unique_scans_n', domain: DEFAULT_DOMAIN)
45
+ def search(terms, locale: DEFAULT_LOCALE, page: 1, page_size: 20, sort_by: 'unique_scans_n',
46
+ domain: DEFAULT_DOMAIN)
45
47
  terms = CGI.escape(terms)
46
- path = "cgi/search.pl?search_terms=#{terms}&jqm=1&page=#{page}&page_size=#{page_size}&sort_by=#{sort_by}"
48
+ path = "cgi/search.pl?search_terms=#{terms}&json=1&page=#{page}&page_size=#{page_size}&sort_by=#{sort_by}"
47
49
  url = "https://#{locale}.#{domain}/#{path}"
48
- json = URI.open(url).read
50
+ json = Openfoodfacts.http_get(url).read
49
51
  hash = JSON.parse(json)
50
- html = hash["jqm"]
51
-
52
- from_jquery_mobile_list(html)
52
+ products = []
53
+ hash['products'].each do |data|
54
+ products << new(data)
55
+ end
56
+ products
53
57
  end
54
- alias_method :where, :search
58
+ alias where search
55
59
 
56
60
  def from_html_list(html, list_css_selector, code_from_link_regex, locale: 'world')
57
61
  dom = Nokogiri::HTML.fragment(html)
58
- dom.css(list_css_selector).map do |product|
62
+ dom.css(list_css_selector).filter_map do |product|
59
63
  attributes = {}
60
64
 
61
- if link = product.css('a').first
62
- attributes["product_name"] = link.inner_text.strip
63
-
64
- if code = link.attr('href')[code_from_link_regex, 1]
65
- attributes["_id"] = code
66
- attributes["code"] = code
65
+ # Look for product links with multiple patterns
66
+ link = product.css('a[href*="/product/"], a[href*="/produit/"]').first
67
+ link ||= product.css('a').first
68
+
69
+ next unless link
70
+
71
+ attributes['product_name'] = link.inner_text.strip
72
+ href = link.attr('href')
73
+
74
+ # Try multiple regex patterns for extracting product codes
75
+ regexes = [
76
+ code_from_link_regex, # Original pattern
77
+ %r{/product/(\d+)}i, # /product/123456
78
+ %r{/produit/(\d+)}i, # /produit/123456 (French)
79
+ %r{/(\d{8,})}, # Any 8+ digit number
80
+ %r{product[/=](\d+)}i, # product=123456 or product/123456
81
+ %r{code[/=](\d+)}i # code=123456 or code/123456
82
+ ]
83
+
84
+ code = nil
85
+ regexes.each do |regex|
86
+ match = href[regex, 1]
87
+ if match && match.length >= 8 # Product codes are typically 8+ digits
88
+ code = match
89
+ break
67
90
  end
68
91
  end
69
92
 
70
- if image = product.css('img').first and image_url = image.attr('src')
71
- attributes["image_small_url"] = image_url
72
- attributes["lc"] = Locale.locale_from_link(image_url)
93
+ next unless code
94
+
95
+ attributes['_id'] = code
96
+ attributes['code'] = code
97
+
98
+ # Skip products without valid codes
99
+
100
+ if (image = product.css('img').first) && (image_url = image.attr('src'))
101
+ attributes['image_small_url'] = image_url
102
+ attributes['lc'] = Locale.locale_from_link(image_url)
73
103
  end
74
- attributes["lc"] ||= locale
104
+ attributes['lc'] ||= locale
75
105
 
76
106
  new(attributes)
77
107
  end
78
-
79
- end
80
-
81
- def from_jquery_mobile_list(jqm_html)
82
- from_html_list(jqm_html, 'ul#search_results_list li:not(#loadmore)', /code=(\d+)\Z/i)
83
108
  end
84
109
 
85
110
  def from_website_list(html, locale: 'world')
86
- from_html_list(html, 'ul.products li', /\/(\d+)\/?/i, locale: 'world')
111
+ # Try multiple CSS selectors to handle different page structures
112
+ selectors = [
113
+ 'ul.products li', # Original selector
114
+ '.search_results article', # Modern article-based structure
115
+ '.search-results .result', # Alternative modern structure
116
+ 'article', # Simple article tags
117
+ '.product-item', # Product item classes
118
+ '.product', # Simple product classes
119
+ 'li[data-product-code]' # Data attribute based
120
+ ]
121
+
122
+ dom = Nokogiri::HTML.fragment(html)
123
+
124
+ selectors.each do |selector|
125
+ elements = dom.css(selector)
126
+ next if elements.empty?
127
+
128
+ # Check if elements contain product links
129
+ first_element = elements.first
130
+ if first_element && (first_element.css('a[href*="/product/"]').any? || first_element.css('a[href*="/produit/"]').any?)
131
+ return from_html_list(html, selector, %r{/(\d+)/?}i, locale: locale)
132
+ end
133
+ end
134
+
135
+ # Fallback: return empty array if no products found
136
+ []
87
137
  end
88
138
 
89
139
  # page -1 to fetch all pages
@@ -96,22 +146,39 @@ module Openfoodfacts
96
146
  products = []
97
147
 
98
148
  page = 1
99
- begin
149
+ loop do
100
150
  products_on_page = from_website_page(page_url, page: page)
101
151
  products += products_on_page
102
152
  page += 1
103
- end while products_on_page.any?
153
+ break unless products_on_page.any?
154
+ end
104
155
 
105
156
  products
106
157
  end
107
158
  else
108
- html = URI.open("#{page_url}/#{page}").read
159
+ # Try different URL formats for pagination
160
+ urls_to_try = [
161
+ "#{page_url}/#{page}", # Original format: /page/1
162
+ "#{page_url}?page=#{page}", # Query parameter: ?page=1
163
+ "#{page_url}#{page_url.include?('?') ? '&' : '?'}page=#{page}" # Proper query parameter handling
164
+ ]
165
+
166
+ html = nil
167
+ urls_to_try.each do |url|
168
+ html = Openfoodfacts.http_get(url).read
169
+ break if html&.length&.positive?
170
+ rescue StandardError
171
+ # Continue to next URL format
172
+ next
173
+ end
174
+
175
+ html ||= '' # Fallback to empty string if all URLs fail
109
176
  from_website_list(html, locale: Locale.locale_from_link(page_url))
110
177
  end
111
178
  end
112
179
 
113
180
  def tags_from_page(_klass, page_url, &custom_tag_parsing)
114
- html = URI.open(page_url).read
181
+ html = Openfoodfacts.http_get(page_url).read
115
182
  dom = Nokogiri::HTML.fragment(html)
116
183
 
117
184
  dom.css('table#tagstable tbody tr').map do |tag|
@@ -125,34 +192,33 @@ module Openfoodfacts
125
192
  if (name.nil? || name == '') && img_alt
126
193
  img_alt_text = img_alt.to_s.strip
127
194
  name = if img_alt_text.include?(':')
128
- img_alt_text.split(':').last.strip
129
- else
130
- img_alt_text[/\s+([^\s]+)$/, 1]
131
- end
195
+ img_alt_text.split(':').last.strip
196
+ else
197
+ img_alt_text[/\s+([^\s]+)$/, 1]
198
+ end
132
199
  end
133
200
 
134
201
  _klass.new({
135
- "name" => name,
136
- "url" => URI.join(page_url, link.attr('href')).to_s,
137
- "products_count" => tag.css('td')[1].text.to_i
138
- })
202
+ 'name' => name,
203
+ 'url' => URI.join(page_url, link.attr('href')).to_s,
204
+ 'products_count' => tag.css('td')[1].text.to_i
205
+ })
139
206
  end
140
207
  end
141
208
  end
142
-
143
209
  end
144
210
 
145
211
  # Fetch product
146
212
  #
147
213
  def fetch
148
- if (self.code)
149
- product = self.class.get(self.code)
150
- self.merge!(product)
214
+ if code
215
+ product = self.class.get(code)
216
+ merge!(product) if product
151
217
  end
152
218
 
153
219
  self
154
220
  end
155
- alias_method :reload, :fetch
221
+ alias reload fetch
156
222
 
157
223
  # Update product
158
224
  # Only product_name, brands and quantity fields seems to be updatable throught app / API.
@@ -160,38 +226,37 @@ module Openfoodfacts
160
226
  # Tested not updatable fields: countries, ingredients_text, purchase_places, purchase_places_tag, purchase_places_tags
161
227
  #
162
228
  def update(user: nil, domain: DEFAULT_DOMAIN)
163
- if self.code && self.lc
164
- subdomain = self.lc == 'world' ? 'world' : "world-#{self.lc}"
229
+ if code && lc
230
+ subdomain = lc == 'world' ? 'world' : "world-#{lc}"
165
231
  path = 'cgi/product_jqm.pl'
166
232
  uri = URI("https://#{subdomain}.#{domain}/#{path}")
167
- params = self.to_hash
168
- params.merge!("user_id" => user.user_id, "password" => user.password) if user
233
+ params = to_hash
234
+ params.merge!('user_id' => user.user_id, 'password' => user.password) if user
169
235
  response = Net::HTTP.post_form(uri, params)
170
236
 
171
237
  data = JSON.parse(response.body)
172
- data["status"] == 1
238
+ data['status'] == 1
173
239
  else
174
240
  false
175
241
  end
176
242
  end
177
- alias_method :save, :update
243
+ alias save update
178
244
 
179
245
  # Return Product API URL
180
246
  #
181
247
  def url(locale: DEFAULT_LOCALE)
182
- self.class.url(self.code, locale: locale)
248
+ self.class.url(code, locale: locale)
183
249
  end
184
250
 
185
251
  # Return Product web URL according to locale
186
252
  #
187
253
  def weburl(locale: nil, domain: DEFAULT_DOMAIN)
188
- locale ||= self.lc || DEFAULT_LOCALE
254
+ locale ||= lc || DEFAULT_LOCALE
189
255
 
190
- if self.code && prefix = LOCALE_WEBURL_PREFIXES[locale]
191
- path = "#{prefix}/#{self.code}"
256
+ if code && (prefix = LOCALE_WEBURL_PREFIXES[locale])
257
+ path = "#{prefix}/#{code}"
192
258
  "https://#{locale}.#{domain}/#{path}"
193
259
  end
194
260
  end
195
-
196
261
  end
197
262
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class ProductState < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'etats',
9
10
  'uk' => 'states',
10
11
  'us' => 'states',
11
12
  'world' => 'states'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get product states
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
20
- Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
19
+ if (path = LOCALE_PATHS[locale])
20
+ Product.tags_from_page(self, "https://#{locale}.#{domain}/facets/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products with state
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class PurchasePlace < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'lieux-de-vente',
9
10
  'uk' => 'purchase-places',
10
11
  'us' => 'purchase-places',
11
12
  'world' => 'purchase-places'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get purchase places
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
19
+ if (path = LOCALE_PATHS[locale])
20
20
  Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products with purchase place
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class Store < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'magasins',
9
10
  'uk' => 'stores',
10
11
  'us' => 'stores',
11
12
  'world' => 'stores'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get stores
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
19
+ if (path = LOCALE_PATHS[locale])
20
20
  Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products from store
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class Trace < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'traces',
9
10
  'uk' => 'traces',
10
11
  'us' => 'traces',
11
12
  'world' => 'traces'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get traces
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
19
+ if (path = LOCALE_PATHS[locale])
20
20
  Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products with trace
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,40 +1,41 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'net/http'
2
4
 
3
5
  module Openfoodfacts
4
6
  class User < Hashie::Mash
5
-
6
7
  class << self
7
-
8
8
  # Login
9
9
  #
10
10
  def login(user_id, password, locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
11
11
  path = 'cgi/session.pl'
12
12
  uri = URI("https://#{locale}.#{domain}/#{path}")
13
13
  params = {
14
- "jqm" => "1",
15
- "user_id" => user_id,
16
- "password" => password
14
+ 'jqm' => '1',
15
+ 'user_id' => user_id,
16
+ 'password' => password
17
17
  }
18
18
 
19
19
  response = Net::HTTP.post_form(uri, params)
20
+ return nil if response.code != '200'
21
+
20
22
  data = JSON.parse(response.body)
21
23
 
22
- if data['user_id']
23
- data.merge!(password: password)
24
- new(data)
25
- end
26
- end
24
+ return unless data['user_id']
27
25
 
26
+ data.merge!(password: password)
27
+ new(data)
28
+ end
28
29
  end
29
30
 
30
31
  # Login
31
32
  #
32
33
  def login(locale: DEFAULT_LOCALE)
33
- if user = self.class.login(self.user_id, self.password, locale: locale)
34
- self.name = user.name
35
- self
36
- end
37
- end
34
+ user = self.class.login(user_id, password, locale: locale)
35
+ return unless user
38
36
 
37
+ self.name = user.name
38
+ self
39
+ end
39
40
  end
40
41
  end
@@ -1,3 +1,3 @@
1
1
  module Openfoodfacts
2
- VERSION = "0.6.2"
2
+ VERSION = "0.10.0"
3
3
  end
data/lib/openfoodfacts.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'openfoodfacts/additive'
2
4
  require_relative 'openfoodfacts/brand'
3
5
  require_relative 'openfoodfacts/category'
@@ -6,6 +8,7 @@ require_relative 'openfoodfacts/contributor'
6
8
  require_relative 'openfoodfacts/country'
7
9
  require_relative 'openfoodfacts/entry_date'
8
10
  require_relative 'openfoodfacts/faq'
11
+ require_relative 'openfoodfacts/ingredient'
9
12
  require_relative 'openfoodfacts/ingredient_that_may_be_from_palm_oil'
10
13
  require_relative 'openfoodfacts/label'
11
14
  require_relative 'openfoodfacts/language'
@@ -18,6 +21,7 @@ require_relative 'openfoodfacts/nutrition_grade'
18
21
  require_relative 'openfoodfacts/origin'
19
22
  require_relative 'openfoodfacts/packager_code'
20
23
  require_relative 'openfoodfacts/packaging'
24
+ require_relative 'openfoodfacts/period_after_opening'
21
25
  require_relative 'openfoodfacts/press'
22
26
  require_relative 'openfoodfacts/product'
23
27
  require_relative 'openfoodfacts/product_state'
@@ -32,11 +36,17 @@ require 'nokogiri'
32
36
  require 'open-uri'
33
37
 
34
38
  module Openfoodfacts
35
-
36
39
  DEFAULT_LOCALE = Locale::GLOBAL
37
40
  DEFAULT_DOMAIN = 'openfoodfacts.org'
38
41
 
39
42
  class << self
43
+ # Centralized HTTP client method with User-Agent header
44
+ #
45
+ def http_get(url)
46
+ user_agent = ENV.fetch('OPENFOODFACTS_USER_AGENT', nil)
47
+ headers = user_agent ? { 'User-Agent' => user_agent } : {}
48
+ URI.parse(url).open(headers)
49
+ end
40
50
 
41
51
  # Return locale from link
42
52
  #
@@ -61,6 +71,5 @@ module Openfoodfacts
61
71
  def product_url(barcode, locale: DEFAULT_LOCALE)
62
72
  Product.url(barcode, locale: locale)
63
73
  end
64
-
65
74
  end
66
75
  end