onebox 2.2.9 → 2.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/onebox/engine/amazon_onebox.rb +22 -12
- data/lib/onebox/engine/html.rb +4 -1
- data/lib/onebox/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f942eef9390a961ec26aa8d4301bf83eedabe577d067639ab6e0036ca20e02b9
|
4
|
+
data.tar.gz: b05c99bd27f025edc02475e38cc28b900bfa2bf64e1c5f9cd4be74cd5ab927a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '081ccc4c533884804d094d40b71175409077002595b10b9c3fd56d8f6c11bcacdc99502fdd78f082adb1e59d83090182550436579ec8bc2361714bb659f7a693'
|
7
|
+
data.tar.gz: d3dfcb8f455e7d9189415cfdf9e00f7f1d938b8ce41d8d307ff2ff4bbb6fcf09449a4a881b0bfec34f8b275556691601f2e6d070351ea2b51d5626e5a561b2fd
|
@@ -14,17 +14,20 @@ module Onebox
|
|
14
14
|
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
15
15
|
|
16
16
|
def url
|
17
|
-
#
|
18
|
-
#
|
17
|
+
# If possible, fetch the cached HTML body immediately so we can
|
18
|
+
# try to grab the canonical URL from that document,
|
19
19
|
# rather than guess at the best URL structure to use
|
20
|
-
if
|
21
|
-
if
|
22
|
-
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params,
|
23
|
-
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
24
|
-
return canonical_link.to_s if canonical_link
|
20
|
+
if body_cacher&.respond_to?('cache_response_body?')
|
21
|
+
if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
26
|
+
if @raw
|
27
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
28
|
+
return canonical_link.to_s if canonical_link
|
29
|
+
end
|
30
|
+
|
28
31
|
if match && match[:id]
|
29
32
|
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
30
33
|
end
|
@@ -45,7 +48,7 @@ module Onebox
|
|
45
48
|
private
|
46
49
|
|
47
50
|
def match
|
48
|
-
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[
|
51
|
+
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
|
49
52
|
end
|
50
53
|
|
51
54
|
def image
|
@@ -60,6 +63,10 @@ module Onebox
|
|
60
63
|
end
|
61
64
|
|
62
65
|
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
66
|
+
attributes = landing_image.first.attributes
|
67
|
+
|
68
|
+
return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
|
69
|
+
|
63
70
|
landing_image.first["src"].to_s
|
64
71
|
end
|
65
72
|
|
@@ -110,7 +117,7 @@ module Onebox
|
|
110
117
|
end
|
111
118
|
|
112
119
|
result = {
|
113
|
-
link:
|
120
|
+
link: url,
|
114
121
|
title: title,
|
115
122
|
by_info: authors,
|
116
123
|
image: og.image || image,
|
@@ -141,7 +148,7 @@ module Onebox
|
|
141
148
|
end
|
142
149
|
|
143
150
|
result = {
|
144
|
-
link:
|
151
|
+
link: url,
|
145
152
|
title: title,
|
146
153
|
by_info: authors,
|
147
154
|
image: og.image || image,
|
@@ -157,7 +164,7 @@ module Onebox
|
|
157
164
|
else
|
158
165
|
title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
|
159
166
|
result = {
|
160
|
-
link:
|
167
|
+
link: url,
|
161
168
|
title: title,
|
162
169
|
image: og.image || image,
|
163
170
|
price: price
|
@@ -167,7 +174,10 @@ module Onebox
|
|
167
174
|
result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
|
168
175
|
|
169
176
|
summary = raw.at("#productDescription")
|
170
|
-
|
177
|
+
|
178
|
+
description = og.description || summary&.inner_text
|
179
|
+
description ||= raw.css("meta[name=description]").first&.[]("content")
|
180
|
+
result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
|
171
181
|
end
|
172
182
|
|
173
183
|
result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
|
data/lib/onebox/engine/html.rb
CHANGED
@@ -11,10 +11,13 @@ module Onebox
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def raw
|
14
|
-
body_cacher = self.options[:body_cacher] if self.options
|
15
14
|
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
|
16
15
|
end
|
17
16
|
|
17
|
+
def body_cacher
|
18
|
+
self.options&.[](:body_cacher)
|
19
|
+
end
|
20
|
+
|
18
21
|
def html?
|
19
22
|
raw.respond_to(:css)
|
20
23
|
end
|
data/lib/onebox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-
|
13
|
+
date: 2021-04-02 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|