onebox 2.2.8 → 2.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/onebox/engine/amazon_onebox.rb +16 -6
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/google_docs_onebox.rb +1 -1
- data/lib/onebox/engine/html.rb +2 -1
- data/lib/onebox/engine/pastebin_onebox.rb +1 -1
- data/lib/onebox/engine/twitter_status_onebox.rb +1 -1
- data/lib/onebox/helpers.rb +22 -11
- data/lib/onebox/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
|
4
|
+
data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
|
7
|
+
data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
|
@@ -11,11 +11,22 @@ module Onebox
|
|
11
11
|
include HTML
|
12
12
|
|
13
13
|
always_https
|
14
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
|
14
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
15
15
|
|
16
16
|
def url
|
17
|
+
# Have we cached the HTML body of the requested URL?
|
18
|
+
# If so, try to grab the canonical URL from that document,
|
19
|
+
# rather than guess at the best URL structure to use
|
20
|
+
if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
|
21
|
+
if @body_cacher.cached_response_body_exists?(uri.to_s)
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
|
23
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
24
|
+
return canonical_link.to_s if canonical_link
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
17
28
|
if match && match[:id]
|
18
|
-
return "https://www.amazon.#{tld}/
|
29
|
+
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
19
30
|
end
|
20
31
|
|
21
32
|
@url
|
@@ -26,10 +37,9 @@ module Onebox
|
|
26
37
|
end
|
27
38
|
|
28
39
|
def http_params
|
29
|
-
|
30
|
-
'User-Agent' =>
|
31
|
-
|
32
|
-
}
|
40
|
+
if @options && @options[:user_agent]
|
41
|
+
{ 'User-Agent' => @options[:user_agent] }
|
42
|
+
end
|
33
43
|
end
|
34
44
|
|
35
45
|
private
|
@@ -47,7 +47,7 @@ module Onebox
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def get_og_data
|
50
|
-
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
|
50
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
51
51
|
html = Nokogiri::HTML(response)
|
52
52
|
og_data = {}
|
53
53
|
html.css('meta').each do |m|
|
data/lib/onebox/engine/html.rb
CHANGED
@@ -31,7 +31,7 @@ module Onebox
|
|
31
31
|
|
32
32
|
def lines
|
33
33
|
return @lines if @lines
|
34
|
-
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
|
34
|
+
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
|
35
35
|
@lines = response.split("\n")
|
36
36
|
end
|
37
37
|
|
@@ -17,7 +17,7 @@ module Onebox
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def get_twitter_data
|
20
|
-
response = Onebox::Helpers.fetch_response(url,
|
20
|
+
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
|
21
21
|
html = Nokogiri::HTML(response)
|
22
22
|
twitter_data = {}
|
23
23
|
html.css('meta').each do |m|
|
data/lib/onebox/helpers.rb
CHANGED
@@ -24,8 +24,8 @@ module Onebox
|
|
24
24
|
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
25
25
|
end
|
26
26
|
|
27
|
-
def self.fetch_html_doc(url, headers = nil)
|
28
|
-
response = (fetch_response(url,
|
27
|
+
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
|
28
|
+
response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
|
29
29
|
doc = Nokogiri::HTML(response)
|
30
30
|
uri = Addressable::URI.parse(url)
|
31
31
|
|
@@ -37,7 +37,7 @@ module Onebox
|
|
37
37
|
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
38
38
|
canonical_uri = Addressable::URI.parse(canonical_link)
|
39
39
|
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
|
40
|
-
response = (fetch_response(canonical_uri.to_s,
|
40
|
+
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
|
41
41
|
doc = Nokogiri::HTML(response) if response
|
42
42
|
end
|
43
43
|
end
|
@@ -45,16 +45,23 @@ module Onebox
|
|
45
45
|
doc
|
46
46
|
end
|
47
47
|
|
48
|
-
def self.fetch_response(location,
|
48
|
+
def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
|
49
|
+
redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
|
49
50
|
|
50
|
-
|
51
|
-
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
52
|
-
|
53
|
-
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
51
|
+
raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
|
54
52
|
|
55
53
|
uri = Addressable::URI.parse(location)
|
56
54
|
uri = Addressable::URI.join(domain, uri) if !uri.host
|
57
55
|
|
56
|
+
use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
|
57
|
+
if use_body_cacher
|
58
|
+
response_body = body_cacher.fetch_cached_response_body(uri.to_s)
|
59
|
+
|
60
|
+
if response_body.present?
|
61
|
+
return response_body
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
58
65
|
result = StringIO.new
|
59
66
|
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
|
60
67
|
http.open_timeout = Onebox.options.connect_timeout
|
@@ -86,9 +93,9 @@ module Onebox
|
|
86
93
|
response.error! unless [301, 302].include?(code)
|
87
94
|
return fetch_response(
|
88
95
|
response['location'],
|
89
|
-
|
90
|
-
"#{uri.scheme}://#{uri.host}",
|
91
|
-
redir_header
|
96
|
+
redirect_limit: redirect_limit - 1,
|
97
|
+
domain: "#{uri.scheme}://#{uri.host}",
|
98
|
+
headers: redir_header
|
92
99
|
)
|
93
100
|
end
|
94
101
|
|
@@ -98,6 +105,10 @@ module Onebox
|
|
98
105
|
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
99
106
|
end
|
100
107
|
|
108
|
+
if use_body_cacher && body_cacher.cache_response_body?(uri)
|
109
|
+
body_cacher.cache_response_body(uri.to_s, result.string)
|
110
|
+
end
|
111
|
+
|
101
112
|
return result.string
|
102
113
|
end
|
103
114
|
end
|
data/lib/onebox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-03-
|
13
|
+
date: 2021-03-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|