onebox 2.2.9 → 2.2.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
4
- data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
3
+ metadata.gz: f942eef9390a961ec26aa8d4301bf83eedabe577d067639ab6e0036ca20e02b9
4
+ data.tar.gz: b05c99bd27f025edc02475e38cc28b900bfa2bf64e1c5f9cd4be74cd5ab927a4
5
5
  SHA512:
6
- metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
7
- data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
6
+ metadata.gz: '081ccc4c533884804d094d40b71175409077002595b10b9c3fd56d8f6c11bcacdc99502fdd78f082adb1e59d83090182550436579ec8bc2361714bb659f7a693'
7
+ data.tar.gz: d3dfcb8f455e7d9189415cfdf9e00f7f1d938b8ce41d8d307ff2ff4bbb6fcf09449a4a881b0bfec34f8b275556691601f2e6d070351ea2b51d5626e5a561b2fd
@@ -14,17 +14,20 @@ module Onebox
14
14
  matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
- # Have we cached the HTML body of the requested URL?
18
- # If so, try to grab the canonical URL from that document,
17
+ # If possible, fetch the cached HTML body immediately so we can
18
+ # try to grab the canonical URL from that document,
19
19
  # rather than guess at the best URL structure to use
20
- if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
21
- if @body_cacher.cached_response_body_exists?(uri.to_s)
22
- @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
23
- canonical_link = @raw.at('//link[@rel="canonical"]/@href')
24
- return canonical_link.to_s if canonical_link
20
+ if body_cacher&.respond_to?('cache_response_body?')
21
+ if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
25
23
  end
26
24
  end
27
25
 
26
+ if @raw
27
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
28
+ return canonical_link.to_s if canonical_link
29
+ end
30
+
28
31
  if match && match[:id]
29
32
  return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
30
33
  end
@@ -45,7 +48,7 @@ module Onebox
45
48
  private
46
49
 
47
50
  def match
48
- @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[^\/]+)(?:\/|$)/mi)
51
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
49
52
  end
50
53
 
51
54
  def image
@@ -60,6 +63,10 @@ module Onebox
60
63
  end
61
64
 
62
65
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
66
+ attributes = landing_image.first.attributes
67
+
68
+ return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
69
+
63
70
  landing_image.first["src"].to_s
64
71
  end
65
72
 
@@ -110,7 +117,7 @@ module Onebox
110
117
  end
111
118
 
112
119
  result = {
113
- link: link,
120
+ link: url,
114
121
  title: title,
115
122
  by_info: authors,
116
123
  image: og.image || image,
@@ -141,7 +148,7 @@ module Onebox
141
148
  end
142
149
 
143
150
  result = {
144
- link: link,
151
+ link: url,
145
152
  title: title,
146
153
  by_info: authors,
147
154
  image: og.image || image,
@@ -157,7 +164,7 @@ module Onebox
157
164
  else
158
165
  title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
159
166
  result = {
160
- link: link,
167
+ link: url,
161
168
  title: title,
162
169
  image: og.image || image,
163
170
  price: price
@@ -167,7 +174,10 @@ module Onebox
167
174
  result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
168
175
 
169
176
  summary = raw.at("#productDescription")
170
- result[:description] = og.description || (summary && summary.inner_text) || CGI.unescapeHTML(Onebox::Helpers.truncate(raw.css("meta[name=description]").first["content"], 250))
177
+
178
+ description = og.description || summary&.inner_text
179
+ description ||= raw.css("meta[name=description]").first&.[]("content")
180
+ result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
171
181
  end
172
182
 
173
183
  result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
@@ -11,10 +11,13 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- body_cacher = self.options[:body_cacher] if self.options
15
14
  @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
16
15
  end
17
16
 
17
+ def body_cacher
18
+ self.options&.[](:body_cacher)
19
+ end
20
+
18
21
  def html?
19
22
  raw.respond_to(:css)
20
23
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.9"
4
+ VERSION = "2.2.10"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.9
4
+ version: 2.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-03-31 00:00:00.000000000 Z
13
+ date: 2021-04-02 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable