onebox 2.2.8 → 2.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/onebox/engine/amazon_onebox.rb +16 -6
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/google_docs_onebox.rb +1 -1
- data/lib/onebox/engine/html.rb +2 -1
- data/lib/onebox/engine/pastebin_onebox.rb +1 -1
- data/lib/onebox/engine/twitter_status_onebox.rb +1 -1
- data/lib/onebox/helpers.rb +22 -11
- data/lib/onebox/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
|
4
|
+
data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
|
7
|
+
data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
|
@@ -11,11 +11,22 @@ module Onebox
|
|
11
11
|
include HTML
|
12
12
|
|
13
13
|
always_https
|
14
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
|
14
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
15
15
|
|
16
16
|
def url
|
17
|
+
# Have we cached the HTML body of the requested URL?
|
18
|
+
# If so, try to grab the canonical URL from that document,
|
19
|
+
# rather than guess at the best URL structure to use
|
20
|
+
if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
|
21
|
+
if @body_cacher.cached_response_body_exists?(uri.to_s)
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
|
23
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
24
|
+
return canonical_link.to_s if canonical_link
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
17
28
|
if match && match[:id]
|
18
|
-
return "https://www.amazon.#{tld}/
|
29
|
+
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
19
30
|
end
|
20
31
|
|
21
32
|
@url
|
@@ -26,10 +37,9 @@ module Onebox
|
|
26
37
|
end
|
27
38
|
|
28
39
|
def http_params
|
29
|
-
|
30
|
-
'User-Agent' =>
|
31
|
-
|
32
|
-
}
|
40
|
+
if @options && @options[:user_agent]
|
41
|
+
{ 'User-Agent' => @options[:user_agent] }
|
42
|
+
end
|
33
43
|
end
|
34
44
|
|
35
45
|
private
|
@@ -47,7 +47,7 @@ module Onebox
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def get_og_data
|
50
|
-
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
|
50
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
51
51
|
html = Nokogiri::HTML(response)
|
52
52
|
og_data = {}
|
53
53
|
html.css('meta').each do |m|
|
data/lib/onebox/engine/html.rb
CHANGED
@@ -31,7 +31,7 @@ module Onebox
|
|
31
31
|
|
32
32
|
def lines
|
33
33
|
return @lines if @lines
|
34
|
-
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
|
34
|
+
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
|
35
35
|
@lines = response.split("\n")
|
36
36
|
end
|
37
37
|
|
@@ -17,7 +17,7 @@ module Onebox
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def get_twitter_data
|
20
|
-
response = Onebox::Helpers.fetch_response(url,
|
20
|
+
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
|
21
21
|
html = Nokogiri::HTML(response)
|
22
22
|
twitter_data = {}
|
23
23
|
html.css('meta').each do |m|
|
data/lib/onebox/helpers.rb
CHANGED
@@ -24,8 +24,8 @@ module Onebox
|
|
24
24
|
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
25
25
|
end
|
26
26
|
|
27
|
-
def self.fetch_html_doc(url, headers = nil)
|
28
|
-
response = (fetch_response(url,
|
27
|
+
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
|
28
|
+
response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
|
29
29
|
doc = Nokogiri::HTML(response)
|
30
30
|
uri = Addressable::URI.parse(url)
|
31
31
|
|
@@ -37,7 +37,7 @@ module Onebox
|
|
37
37
|
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
38
38
|
canonical_uri = Addressable::URI.parse(canonical_link)
|
39
39
|
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
|
40
|
-
response = (fetch_response(canonical_uri.to_s,
|
40
|
+
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
|
41
41
|
doc = Nokogiri::HTML(response) if response
|
42
42
|
end
|
43
43
|
end
|
@@ -45,16 +45,23 @@ module Onebox
|
|
45
45
|
doc
|
46
46
|
end
|
47
47
|
|
48
|
-
def self.fetch_response(location,
|
48
|
+
def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
|
49
|
+
redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
|
49
50
|
|
50
|
-
|
51
|
-
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
52
|
-
|
53
|
-
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
51
|
+
raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
|
54
52
|
|
55
53
|
uri = Addressable::URI.parse(location)
|
56
54
|
uri = Addressable::URI.join(domain, uri) if !uri.host
|
57
55
|
|
56
|
+
use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
|
57
|
+
if use_body_cacher
|
58
|
+
response_body = body_cacher.fetch_cached_response_body(uri.to_s)
|
59
|
+
|
60
|
+
if response_body.present?
|
61
|
+
return response_body
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
58
65
|
result = StringIO.new
|
59
66
|
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
|
60
67
|
http.open_timeout = Onebox.options.connect_timeout
|
@@ -86,9 +93,9 @@ module Onebox
|
|
86
93
|
response.error! unless [301, 302].include?(code)
|
87
94
|
return fetch_response(
|
88
95
|
response['location'],
|
89
|
-
|
90
|
-
"#{uri.scheme}://#{uri.host}",
|
91
|
-
redir_header
|
96
|
+
redirect_limit: redirect_limit - 1,
|
97
|
+
domain: "#{uri.scheme}://#{uri.host}",
|
98
|
+
headers: redir_header
|
92
99
|
)
|
93
100
|
end
|
94
101
|
|
@@ -98,6 +105,10 @@ module Onebox
|
|
98
105
|
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
99
106
|
end
|
100
107
|
|
108
|
+
if use_body_cacher && body_cacher.cache_response_body?(uri)
|
109
|
+
body_cacher.cache_response_body(uri.to_s, result.string)
|
110
|
+
end
|
111
|
+
|
101
112
|
return result.string
|
102
113
|
end
|
103
114
|
end
|
data/lib/onebox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-03-
|
13
|
+
date: 2021-03-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|