onebox 2.2.8 → 2.2.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5899faff4b0c2e96d766892a810e866e28a188dbd7b9e8b2c7bdd6bc5a7d7490
4
- data.tar.gz: 7d739d48349f32dc3f0675d85b29446d6f758f276b845543cc9f9d359c9eb664
3
+ metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
4
+ data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
5
5
  SHA512:
6
- metadata.gz: 790964df42c83ea05ff052727a7ad4b567f6064be1a25bf974eeac661a6f7daa3696c64686f21bb83df30ad280caffc36b2039e3c0b799a3171713d61c88eff2
7
- data.tar.gz: de30b5e5e6110fd72c0aa792aebffa7571c50f04604efedac648e754a527f4945a784328eb43ce0352e9b06eeac908ea23ff0dcdf3ce4d84ecb5e8c3dd64dfd6
6
+ metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
7
+ data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
@@ -11,11 +11,22 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # Have we cached the HTML body of the requested URL?
18
+ # If so, try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
21
+ if @body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
23
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
24
+ return canonical_link.to_s if canonical_link
25
+ end
26
+ end
27
+
17
28
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
29
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
30
  end
20
31
 
21
32
  @url
@@ -26,10 +37,9 @@ module Onebox
26
37
  end
27
38
 
28
39
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
32
- }
40
+ if @options && @options[:user_agent]
41
+ { 'User-Agent' => @options[:user_agent] }
42
+ end
33
43
  end
34
44
 
35
45
  private
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,8 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ body_cacher = self.options[:body_cacher] if self.options
15
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
16
  end
16
17
 
17
18
  def html?
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.8"
4
+ VERSION = "2.2.9"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.8
4
+ version: 2.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-03-09 00:00:00.000000000 Z
13
+ date: 2021-03-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable