onebox 2.2.8 → 2.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5899faff4b0c2e96d766892a810e866e28a188dbd7b9e8b2c7bdd6bc5a7d7490
4
- data.tar.gz: 7d739d48349f32dc3f0675d85b29446d6f758f276b845543cc9f9d359c9eb664
3
+ metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
4
+ data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
5
5
  SHA512:
6
- metadata.gz: 790964df42c83ea05ff052727a7ad4b567f6064be1a25bf974eeac661a6f7daa3696c64686f21bb83df30ad280caffc36b2039e3c0b799a3171713d61c88eff2
7
- data.tar.gz: de30b5e5e6110fd72c0aa792aebffa7571c50f04604efedac648e754a527f4945a784328eb43ce0352e9b06eeac908ea23ff0dcdf3ce4d84ecb5e8c3dd64dfd6
6
+ metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
7
+ data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
@@ -11,11 +11,22 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # Have we cached the HTML body of the requested URL?
18
+ # If so, try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
21
+ if @body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
23
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
24
+ return canonical_link.to_s if canonical_link
25
+ end
26
+ end
27
+
17
28
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
29
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
30
  end
20
31
 
21
32
  @url
@@ -26,10 +37,9 @@ module Onebox
26
37
  end
27
38
 
28
39
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
32
- }
40
+ if @options && @options[:user_agent]
41
+ { 'User-Agent' => @options[:user_agent] }
42
+ end
33
43
  end
34
44
 
35
45
  private
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,8 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ body_cacher = self.options[:body_cacher] if self.options
15
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
16
  end
16
17
 
17
18
  def html?
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.8"
4
+ VERSION = "2.2.9"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.8
4
+ version: 2.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-03-09 00:00:00.000000000 Z
13
+ date: 2021-03-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable