onebox 2.2.9 → 2.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
4
- data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
3
+ metadata.gz: f942eef9390a961ec26aa8d4301bf83eedabe577d067639ab6e0036ca20e02b9
4
+ data.tar.gz: b05c99bd27f025edc02475e38cc28b900bfa2bf64e1c5f9cd4be74cd5ab927a4
5
5
  SHA512:
6
- metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
7
- data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
6
+ metadata.gz: '081ccc4c533884804d094d40b71175409077002595b10b9c3fd56d8f6c11bcacdc99502fdd78f082adb1e59d83090182550436579ec8bc2361714bb659f7a693'
7
+ data.tar.gz: d3dfcb8f455e7d9189415cfdf9e00f7f1d938b8ce41d8d307ff2ff4bbb6fcf09449a4a881b0bfec34f8b275556691601f2e6d070351ea2b51d5626e5a561b2fd
@@ -14,17 +14,20 @@ module Onebox
14
14
  matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
- # Have we cached the HTML body of the requested URL?
18
- # If so, try to grab the canonical URL from that document,
17
+ # If possible, fetch the cached HTML body immediately so we can
18
+ # try to grab the canonical URL from that document,
19
19
  # rather than guess at the best URL structure to use
20
- if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
21
- if @body_cacher.cached_response_body_exists?(uri.to_s)
22
- @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
23
- canonical_link = @raw.at('//link[@rel="canonical"]/@href')
24
- return canonical_link.to_s if canonical_link
20
+ if body_cacher&.respond_to?('cache_response_body?')
21
+ if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
25
23
  end
26
24
  end
27
25
 
26
+ if @raw
27
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
28
+ return canonical_link.to_s if canonical_link
29
+ end
30
+
28
31
  if match && match[:id]
29
32
  return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
30
33
  end
@@ -45,7 +48,7 @@ module Onebox
45
48
  private
46
49
 
47
50
  def match
48
- @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[^\/]+)(?:\/|$)/mi)
51
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
49
52
  end
50
53
 
51
54
  def image
@@ -60,6 +63,10 @@ module Onebox
60
63
  end
61
64
 
62
65
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
66
+ attributes = landing_image.first.attributes
67
+
68
+ return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
69
+
63
70
  landing_image.first["src"].to_s
64
71
  end
65
72
 
@@ -110,7 +117,7 @@ module Onebox
110
117
  end
111
118
 
112
119
  result = {
113
- link: link,
120
+ link: url,
114
121
  title: title,
115
122
  by_info: authors,
116
123
  image: og.image || image,
@@ -141,7 +148,7 @@ module Onebox
141
148
  end
142
149
 
143
150
  result = {
144
- link: link,
151
+ link: url,
145
152
  title: title,
146
153
  by_info: authors,
147
154
  image: og.image || image,
@@ -157,7 +164,7 @@ module Onebox
157
164
  else
158
165
  title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
159
166
  result = {
160
- link: link,
167
+ link: url,
161
168
  title: title,
162
169
  image: og.image || image,
163
170
  price: price
@@ -167,7 +174,10 @@ module Onebox
167
174
  result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
168
175
 
169
176
  summary = raw.at("#productDescription")
170
- result[:description] = og.description || (summary && summary.inner_text) || CGI.unescapeHTML(Onebox::Helpers.truncate(raw.css("meta[name=description]").first["content"], 250))
177
+
178
+ description = og.description || summary&.inner_text
179
+ description ||= raw.css("meta[name=description]").first&.[]("content")
180
+ result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
171
181
  end
172
182
 
173
183
  result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
@@ -11,10 +11,13 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- body_cacher = self.options[:body_cacher] if self.options
15
14
  @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
16
15
  end
17
16
 
17
+ def body_cacher
18
+ self.options&.[](:body_cacher)
19
+ end
20
+
18
21
  def html?
19
22
  raw.respond_to(:css)
20
23
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.9"
4
+ VERSION = "2.2.10"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.9
4
+ version: 2.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-03-31 00:00:00.000000000 Z
13
+ date: 2021-04-02 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable