onebox 2.2.9 → 2.2.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/onebox/engine/amazon_onebox.rb +22 -12
- data/lib/onebox/engine/html.rb +4 -1
- data/lib/onebox/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f942eef9390a961ec26aa8d4301bf83eedabe577d067639ab6e0036ca20e02b9
|
4
|
+
data.tar.gz: b05c99bd27f025edc02475e38cc28b900bfa2bf64e1c5f9cd4be74cd5ab927a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '081ccc4c533884804d094d40b71175409077002595b10b9c3fd56d8f6c11bcacdc99502fdd78f082adb1e59d83090182550436579ec8bc2361714bb659f7a693'
|
7
|
+
data.tar.gz: d3dfcb8f455e7d9189415cfdf9e00f7f1d938b8ce41d8d307ff2ff4bbb6fcf09449a4a881b0bfec34f8b275556691601f2e6d070351ea2b51d5626e5a561b2fd
|
@@ -14,17 +14,20 @@ module Onebox
|
|
14
14
|
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
15
15
|
|
16
16
|
def url
|
17
|
-
#
|
18
|
-
#
|
17
|
+
# If possible, fetch the cached HTML body immediately so we can
|
18
|
+
# try to grab the canonical URL from that document,
|
19
19
|
# rather than guess at the best URL structure to use
|
20
|
-
if
|
21
|
-
if
|
22
|
-
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params,
|
23
|
-
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
24
|
-
return canonical_link.to_s if canonical_link
|
20
|
+
if body_cacher&.respond_to?('cache_response_body?')
|
21
|
+
if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
26
|
+
if @raw
|
27
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
28
|
+
return canonical_link.to_s if canonical_link
|
29
|
+
end
|
30
|
+
|
28
31
|
if match && match[:id]
|
29
32
|
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
30
33
|
end
|
@@ -45,7 +48,7 @@ module Onebox
|
|
45
48
|
private
|
46
49
|
|
47
50
|
def match
|
48
|
-
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[
|
51
|
+
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
|
49
52
|
end
|
50
53
|
|
51
54
|
def image
|
@@ -60,6 +63,10 @@ module Onebox
|
|
60
63
|
end
|
61
64
|
|
62
65
|
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
66
|
+
attributes = landing_image.first.attributes
|
67
|
+
|
68
|
+
return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
|
69
|
+
|
63
70
|
landing_image.first["src"].to_s
|
64
71
|
end
|
65
72
|
|
@@ -110,7 +117,7 @@ module Onebox
|
|
110
117
|
end
|
111
118
|
|
112
119
|
result = {
|
113
|
-
link:
|
120
|
+
link: url,
|
114
121
|
title: title,
|
115
122
|
by_info: authors,
|
116
123
|
image: og.image || image,
|
@@ -141,7 +148,7 @@ module Onebox
|
|
141
148
|
end
|
142
149
|
|
143
150
|
result = {
|
144
|
-
link:
|
151
|
+
link: url,
|
145
152
|
title: title,
|
146
153
|
by_info: authors,
|
147
154
|
image: og.image || image,
|
@@ -157,7 +164,7 @@ module Onebox
|
|
157
164
|
else
|
158
165
|
title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
|
159
166
|
result = {
|
160
|
-
link:
|
167
|
+
link: url,
|
161
168
|
title: title,
|
162
169
|
image: og.image || image,
|
163
170
|
price: price
|
@@ -167,7 +174,10 @@ module Onebox
|
|
167
174
|
result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
|
168
175
|
|
169
176
|
summary = raw.at("#productDescription")
|
170
|
-
|
177
|
+
|
178
|
+
description = og.description || summary&.inner_text
|
179
|
+
description ||= raw.css("meta[name=description]").first&.[]("content")
|
180
|
+
result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
|
171
181
|
end
|
172
182
|
|
173
183
|
result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
|
data/lib/onebox/engine/html.rb
CHANGED
@@ -11,10 +11,13 @@ module Onebox
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def raw
|
14
|
-
body_cacher = self.options[:body_cacher] if self.options
|
15
14
|
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
|
16
15
|
end
|
17
16
|
|
17
|
+
def body_cacher
|
18
|
+
self.options&.[](:body_cacher)
|
19
|
+
end
|
20
|
+
|
18
21
|
def html?
|
19
22
|
raw.respond_to(:css)
|
20
23
|
end
|
data/lib/onebox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-
|
13
|
+
date: 2021-04-02 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|