openfoodfacts 0.6.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,89 +1,139 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'cgi'
2
4
  require 'hashie'
3
5
  require 'net/http'
4
6
  require 'nokogiri'
5
- require 'open-uri'
6
7
 
7
8
  module Openfoodfacts
8
9
  class Product < Hashie::Mash
9
-
10
+ # disable_warnings
10
11
  # TODO: Add more locales
11
12
  LOCALE_WEBURL_PREFIXES = {
12
13
  'fr' => 'produit',
13
14
  'uk' => 'product',
14
15
  'us' => 'product',
15
16
  'world' => 'product'
16
- }
17
+ }.freeze
17
18
 
18
19
  class << self
19
-
20
20
  # Get product
21
21
  #
22
22
  def get(code, locale: DEFAULT_LOCALE)
23
- if code
24
- product_url = url(code, locale: locale)
25
- json = URI.open(product_url).read
26
- hash = JSON.parse(json)
23
+ return unless code
27
24
 
28
- new(hash["product"]) if !hash["status"].nil? && hash["status"] == 1
29
- end
25
+ product_url = url(code, locale: locale)
26
+ json = Openfoodfacts.http_get(product_url).read
27
+ hash = JSON.parse(json)
28
+
29
+ new(hash['product']) if !hash['status'].nil? && hash['status'] == 1
30
30
  end
31
- alias_method :find, :get
31
+ alias find get
32
32
 
33
33
  # Return product API URL
34
34
  #
35
35
  def url(code, locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
36
- if code
37
- path = "api/v0/produit/#{code}.json"
38
- "https://#{locale}.#{domain}/#{path}"
39
- end
36
+ return unless code
37
+
38
+ prefix = LOCALE_WEBURL_PREFIXES[locale]
39
+ path = "api/v2/#{prefix}/#{code}.json"
40
+ "https://#{locale}.#{domain}/#{path}"
40
41
  end
41
42
 
42
43
  # Search products
43
44
  #
44
- def search(terms, locale: DEFAULT_LOCALE, page: 1, page_size: 20, sort_by: 'unique_scans_n', domain: DEFAULT_DOMAIN)
45
+ def search(terms, locale: DEFAULT_LOCALE, page: 1, page_size: 20, sort_by: 'unique_scans_n',
46
+ domain: DEFAULT_DOMAIN)
45
47
  terms = CGI.escape(terms)
46
- path = "cgi/search.pl?search_terms=#{terms}&jqm=1&page=#{page}&page_size=#{page_size}&sort_by=#{sort_by}"
48
+ path = "cgi/search.pl?search_terms=#{terms}&json=1&page=#{page}&page_size=#{page_size}&sort_by=#{sort_by}"
47
49
  url = "https://#{locale}.#{domain}/#{path}"
48
- json = URI.open(url).read
50
+ json = Openfoodfacts.http_get(url).read
49
51
  hash = JSON.parse(json)
50
- html = hash["jqm"]
51
-
52
- from_jquery_mobile_list(html)
52
+ products = []
53
+ hash['products'].each do |data|
54
+ products << new(data)
55
+ end
56
+ products
53
57
  end
54
- alias_method :where, :search
58
+ alias where search
55
59
 
56
60
  def from_html_list(html, list_css_selector, code_from_link_regex, locale: 'world')
57
61
  dom = Nokogiri::HTML.fragment(html)
58
- dom.css(list_css_selector).map do |product|
62
+ dom.css(list_css_selector).filter_map do |product|
59
63
  attributes = {}
60
64
 
61
- if link = product.css('a').first
62
- attributes["product_name"] = link.inner_text.strip
65
+ # Look for product links with multiple patterns
66
+ link = product.css('a[href*="/product/"], a[href*="/produit/"]').first
67
+ link ||= product.css('a').first
68
+
69
+ next unless link
70
+
71
+ attributes['product_name'] = link.inner_text.strip
72
+ href = link.attr('href')
73
+
74
+ # Try multiple regex patterns for extracting product codes
75
+ regexes = [
76
+ code_from_link_regex, # Original pattern
77
+ %r{/product/(\d+)}i, # /product/123456
78
+ %r{/produit/(\d+)}i, # /produit/123456 (French)
79
+ %r{/(\d{8,})}, # Any 8+ digit number
80
+ %r{product[/=](\d+)}i, # product=123456 or product/123456
81
+ %r{code[/=](\d+)}i # code=123456 or code/123456
82
+ ]
63
83
 
64
- if code = link.attr('href')[code_from_link_regex, 1]
65
- attributes["_id"] = code
66
- attributes["code"] = code
84
+ code = nil
85
+ regexes.each do |regex|
86
+ match = href[regex, 1]
87
+ if match && match.length >= 8 # Product codes are typically 8+ digits
88
+ code = match
89
+ break
67
90
  end
68
91
  end
69
92
 
70
- if image = product.css('img').first and image_url = image.attr('src')
71
- attributes["image_small_url"] = image_url
72
- attributes["lc"] = Locale.locale_from_link(image_url)
93
+ next unless code
94
+
95
+ attributes['_id'] = code
96
+ attributes['code'] = code
97
+
98
+ # Skip products without valid codes
99
+
100
+ if (image = product.css('img').first) && (image_url = image.attr('src'))
101
+ attributes['image_small_url'] = image_url
102
+ attributes['lc'] = Locale.locale_from_link(image_url)
73
103
  end
74
- attributes["lc"] ||= locale
104
+ attributes['lc'] ||= locale
75
105
 
76
106
  new(attributes)
77
107
  end
78
-
79
- end
80
-
81
- def from_jquery_mobile_list(jqm_html)
82
- from_html_list(jqm_html, 'ul#search_results_list li:not(#loadmore)', /code=(\d+)\Z/i)
83
108
  end
84
109
 
85
110
  def from_website_list(html, locale: 'world')
86
- from_html_list(html, 'ul.products li', /\/(\d+)\/?/i, locale: 'world')
111
+ # Try multiple CSS selectors to handle different page structures
112
+ selectors = [
113
+ 'ul.products li', # Original selector
114
+ '.search_results article', # Modern article-based structure
115
+ '.search-results .result', # Alternative modern structure
116
+ 'article', # Simple article tags
117
+ '.product-item', # Product item classes
118
+ '.product', # Simple product classes
119
+ 'li[data-product-code]' # Data attribute based
120
+ ]
121
+
122
+ dom = Nokogiri::HTML.fragment(html)
123
+
124
+ selectors.each do |selector|
125
+ elements = dom.css(selector)
126
+ next if elements.empty?
127
+
128
+ # Check if elements contain product links
129
+ first_element = elements.first
130
+ if first_element && (first_element.css('a[href*="/product/"]').any? || first_element.css('a[href*="/produit/"]').any?)
131
+ return from_html_list(html, selector, %r{/(\d+)/?}i, locale: locale)
132
+ end
133
+ end
134
+
135
+ # Fallback: return empty array if no products found
136
+ []
87
137
  end
88
138
 
89
139
  # page -1 to fetch all pages
@@ -96,22 +146,39 @@ module Openfoodfacts
96
146
  products = []
97
147
 
98
148
  page = 1
99
- begin
149
+ loop do
100
150
  products_on_page = from_website_page(page_url, page: page)
101
151
  products += products_on_page
102
152
  page += 1
103
- end while products_on_page.any?
153
+ break unless products_on_page.any?
154
+ end
104
155
 
105
156
  products
106
157
  end
107
158
  else
108
- html = URI.open("#{page_url}/#{page}").read
159
+ # Try different URL formats for pagination
160
+ urls_to_try = [
161
+ "#{page_url}/#{page}", # Original format: /page/1
162
+ "#{page_url}?page=#{page}", # Query parameter: ?page=1
163
+ "#{page_url}#{page_url.include?('?') ? '&' : '?'}page=#{page}" # Proper query parameter handling
164
+ ]
165
+
166
+ html = nil
167
+ urls_to_try.each do |url|
168
+ html = Openfoodfacts.http_get(url).read
169
+ break if html&.length&.positive?
170
+ rescue StandardError
171
+ # Continue to next URL format
172
+ next
173
+ end
174
+
175
+ html ||= '' # Fallback to empty string if all URLs fail
109
176
  from_website_list(html, locale: Locale.locale_from_link(page_url))
110
177
  end
111
178
  end
112
179
 
113
180
  def tags_from_page(_klass, page_url, &custom_tag_parsing)
114
- html = URI.open(page_url).read
181
+ html = Openfoodfacts.http_get(page_url).read
115
182
  dom = Nokogiri::HTML.fragment(html)
116
183
 
117
184
  dom.css('table#tagstable tbody tr').map do |tag|
@@ -120,28 +187,38 @@ module Openfoodfacts
120
187
  else
121
188
  link = tag.css('a').first
122
189
 
190
+ name = link.text.strip
191
+ img_alt = link.css('img').attr('alt')
192
+ if (name.nil? || name == '') && img_alt
193
+ img_alt_text = img_alt.to_s.strip
194
+ name = if img_alt_text.include?(':')
195
+ img_alt_text.split(':').last.strip
196
+ else
197
+ img_alt_text[/\s+([^\s]+)$/, 1]
198
+ end
199
+ end
200
+
123
201
  _klass.new({
124
- "name" => link.text.strip,
125
- "url" => URI.join(page_url, link.attr('href')).to_s,
126
- "products_count" => tag.css('td')[1].text.to_i
127
- })
202
+ 'name' => name,
203
+ 'url' => URI.join(page_url, link.attr('href')).to_s,
204
+ 'products_count' => tag.css('td')[1].text.to_i
205
+ })
128
206
  end
129
207
  end
130
208
  end
131
-
132
209
  end
133
210
 
134
211
  # Fetch product
135
212
  #
136
213
  def fetch
137
- if (self.code)
138
- product = self.class.get(self.code)
139
- self.merge!(product)
214
+ if code
215
+ product = self.class.get(code)
216
+ merge!(product) if product
140
217
  end
141
218
 
142
219
  self
143
220
  end
144
- alias_method :reload, :fetch
221
+ alias reload fetch
145
222
 
146
223
  # Update product
147
224
  # Only product_name, brands and quantity fields seems to be updatable throught app / API.
@@ -149,38 +226,37 @@ module Openfoodfacts
149
226
  # Tested not updatable fields: countries, ingredients_text, purchase_places, purchase_places_tag, purchase_places_tags
150
227
  #
151
228
  def update(user: nil, domain: DEFAULT_DOMAIN)
152
- if self.code && self.lc
153
- subdomain = self.lc == 'world' ? 'world' : "world-#{self.lc}"
229
+ if code && lc
230
+ subdomain = lc == 'world' ? 'world' : "world-#{lc}"
154
231
  path = 'cgi/product_jqm.pl'
155
232
  uri = URI("https://#{subdomain}.#{domain}/#{path}")
156
- params = self.to_hash
157
- params.merge!("user_id" => user.user_id, "password" => user.password) if user
233
+ params = to_hash
234
+ params.merge!('user_id' => user.user_id, 'password' => user.password) if user
158
235
  response = Net::HTTP.post_form(uri, params)
159
236
 
160
237
  data = JSON.parse(response.body)
161
- data["status"] == 1
238
+ data['status'] == 1
162
239
  else
163
240
  false
164
241
  end
165
242
  end
166
- alias_method :save, :update
243
+ alias save update
167
244
 
168
245
  # Return Product API URL
169
246
  #
170
247
  def url(locale: DEFAULT_LOCALE)
171
- self.class.url(self.code, locale: locale)
248
+ self.class.url(code, locale: locale)
172
249
  end
173
250
 
174
251
  # Return Product web URL according to locale
175
252
  #
176
253
  def weburl(locale: nil, domain: DEFAULT_DOMAIN)
177
- locale ||= self.lc || DEFAULT_LOCALE
254
+ locale ||= lc || DEFAULT_LOCALE
178
255
 
179
- if self.code && prefix = LOCALE_WEBURL_PREFIXES[locale]
180
- path = "#{prefix}/#{self.code}"
256
+ if code && (prefix = LOCALE_WEBURL_PREFIXES[locale])
257
+ path = "#{prefix}/#{code}"
181
258
  "https://#{locale}.#{domain}/#{path}"
182
259
  end
183
260
  end
184
-
185
261
  end
186
262
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class ProductState < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'etats',
9
10
  'uk' => 'states',
10
11
  'us' => 'states',
11
12
  'world' => 'states'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get product states
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
20
- Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
19
+ if (path = LOCALE_PATHS[locale])
20
+ Product.tags_from_page(self, "https://#{locale}.#{domain}/facets/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products with state
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class PurchasePlace < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'lieux-de-vente',
9
10
  'uk' => 'purchase-places',
10
11
  'us' => 'purchase-places',
11
12
  'world' => 'purchase-places'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get purchase places
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
19
+ if (path = LOCALE_PATHS[locale])
20
20
  Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products with purchase place
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class Store < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'magasins',
9
10
  'uk' => 'stores',
10
11
  'us' => 'stores',
11
12
  'world' => 'stores'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get stores
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
19
+ if (path = LOCALE_PATHS[locale])
20
20
  Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products from store
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,26 +1,25 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hashie'
2
4
 
3
5
  module Openfoodfacts
4
6
  class Trace < Hashie::Mash
5
-
6
7
  # TODO: Add more locales
7
8
  LOCALE_PATHS = {
8
9
  'fr' => 'traces',
9
10
  'uk' => 'traces',
10
11
  'us' => 'traces',
11
12
  'world' => 'traces'
12
- }
13
+ }.freeze
13
14
 
14
15
  class << self
15
-
16
16
  # Get traces
17
17
  #
18
18
  def all(locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
19
- if path = LOCALE_PATHS[locale]
19
+ if (path = LOCALE_PATHS[locale])
20
20
  Product.tags_from_page(self, "https://#{locale}.#{domain}/#{path}")
21
21
  end
22
22
  end
23
-
24
23
  end
25
24
 
26
25
  # Get products with trace
@@ -28,6 +27,5 @@ module Openfoodfacts
28
27
  def products(page: -1)
29
28
  Product.from_website_page(url, page: page, products_count: products_count) if url
30
29
  end
31
-
32
30
  end
33
31
  end
@@ -1,40 +1,41 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'net/http'
2
4
 
3
5
  module Openfoodfacts
4
6
  class User < Hashie::Mash
5
-
6
7
  class << self
7
-
8
8
  # Login
9
9
  #
10
10
  def login(user_id, password, locale: DEFAULT_LOCALE, domain: DEFAULT_DOMAIN)
11
11
  path = 'cgi/session.pl'
12
12
  uri = URI("https://#{locale}.#{domain}/#{path}")
13
13
  params = {
14
- "jqm" => "1",
15
- "user_id" => user_id,
16
- "password" => password
14
+ 'jqm' => '1',
15
+ 'user_id' => user_id,
16
+ 'password' => password
17
17
  }
18
18
 
19
19
  response = Net::HTTP.post_form(uri, params)
20
+ return nil if response.code != '200'
21
+
20
22
  data = JSON.parse(response.body)
21
23
 
22
- if data['user_id']
23
- data.merge!(password: password)
24
- new(data)
25
- end
26
- end
24
+ return unless data['user_id']
27
25
 
26
+ data.merge!(password: password)
27
+ new(data)
28
+ end
28
29
  end
29
30
 
30
31
  # Login
31
32
  #
32
33
  def login(locale: DEFAULT_LOCALE)
33
- if user = self.class.login(self.user_id, self.password, locale: locale)
34
- self.name = user.name
35
- self
36
- end
37
- end
34
+ user = self.class.login(user_id, password, locale: locale)
35
+ return unless user
38
36
 
37
+ self.name = user.name
38
+ self
39
+ end
39
40
  end
40
41
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Openfoodfacts
2
- VERSION = "0.6.0"
4
+ VERSION = '0.9.0'
3
5
  end
data/lib/openfoodfacts.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'openfoodfacts/additive'
2
4
  require_relative 'openfoodfacts/brand'
3
5
  require_relative 'openfoodfacts/category'
@@ -32,11 +34,17 @@ require 'nokogiri'
32
34
  require 'open-uri'
33
35
 
34
36
  module Openfoodfacts
35
-
36
37
  DEFAULT_LOCALE = Locale::GLOBAL
37
38
  DEFAULT_DOMAIN = 'openfoodfacts.org'
38
39
 
39
40
  class << self
41
+ # Centralized HTTP client method with User-Agent header
42
+ #
43
+ def http_get(url)
44
+ user_agent = ENV.fetch('OPENFOODFACTS_USER_AGENT', nil)
45
+ headers = user_agent ? { 'User-Agent' => user_agent } : {}
46
+ URI.parse(url).open(headers)
47
+ end
40
48
 
41
49
  # Return locale from link
42
50
  #
@@ -61,6 +69,5 @@ module Openfoodfacts
61
69
  def product_url(barcode, locale: DEFAULT_LOCALE)
62
70
  Product.url(barcode, locale: locale)
63
71
  end
64
-
65
72
  end
66
73
  end