onebox 2.2.4 → 2.2.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 759d82290c29b9e1e3c97045ca14689dfc0f2f87221c1ce829b12a866abfd8a2
4
- data.tar.gz: 5209c14887939549d9737409bd5dff67c2434c5a5f99596d2af93ef56aeaae6b
3
+ metadata.gz: f942eef9390a961ec26aa8d4301bf83eedabe577d067639ab6e0036ca20e02b9
4
+ data.tar.gz: b05c99bd27f025edc02475e38cc28b900bfa2bf64e1c5f9cd4be74cd5ab927a4
5
5
  SHA512:
6
- metadata.gz: 719a0e4e72b2719f8ab0fd71474bfd07ef485c4dcba2c13809d5fbafa86f9b831ebe1aa7a73237c9cde28d7babc472234fa9ae701d9b18aae25621a641c883a0
7
- data.tar.gz: f8c0c6c5883c48ea2811940f00c2419438e199c1e9a70761453c9aafdf176404633dc79133fc87d4014ee0e0c74a037ee8ee19b2f9b46cd85ef07a491f64cfe6
6
+ metadata.gz: '081ccc4c533884804d094d40b71175409077002595b10b9c3fd56d8f6c11bcacdc99502fdd78f082adb1e59d83090182550436579ec8bc2361714bb659f7a693'
7
+ data.tar.gz: d3dfcb8f455e7d9189415cfdf9e00f7f1d938b8ce41d8d307ff2ff4bbb6fcf09449a4a881b0bfec34f8b275556691601f2e6d070351ea2b51d5626e5a561b2fd
@@ -44,6 +44,6 @@ jobs:
44
44
  - uses: actions/checkout@v2
45
45
 
46
46
  - name: Release Gem
47
- uses: CvX/publish-rubygems-action@master
47
+ uses: discourse/publish-rubygems-action@main
48
48
  env:
49
49
  RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
@@ -227,8 +227,10 @@ module Onebox
227
227
  d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
228
228
  d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
229
229
  d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
230
+ d[:image] = nil if Onebox::Helpers.blank?(d[:image])
230
231
 
231
232
  d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
233
+ d[:video] = nil if Onebox::Helpers.blank?(d[:video])
232
234
 
233
235
  d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
234
236
  if !Onebox::Helpers.blank?(d[:published_time])
@@ -11,11 +11,25 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # If possible, fetch the cached HTML body immediately so we can
18
+ # try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if body_cacher&.respond_to?('cache_response_body?')
21
+ if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
23
+ end
24
+ end
25
+
26
+ if @raw
27
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
28
+ return canonical_link.to_s if canonical_link
29
+ end
30
+
17
31
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
32
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
33
  end
20
34
 
21
35
  @url
@@ -26,16 +40,15 @@ module Onebox
26
40
  end
27
41
 
28
42
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
32
- }
43
+ if @options && @options[:user_agent]
44
+ { 'User-Agent' => @options[:user_agent] }
45
+ end
33
46
  end
34
47
 
35
48
  private
36
49
 
37
50
  def match
38
- @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[^\/]+)(?:\/|$)/mi)
51
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
39
52
  end
40
53
 
41
54
  def image
@@ -50,6 +63,10 @@ module Onebox
50
63
  end
51
64
 
52
65
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
66
+ attributes = landing_image.first.attributes
67
+
68
+ return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
69
+
53
70
  landing_image.first["src"].to_s
54
71
  end
55
72
 
@@ -100,7 +117,7 @@ module Onebox
100
117
  end
101
118
 
102
119
  result = {
103
- link: link,
120
+ link: url,
104
121
  title: title,
105
122
  by_info: authors,
106
123
  image: og.image || image,
@@ -131,7 +148,7 @@ module Onebox
131
148
  end
132
149
 
133
150
  result = {
134
- link: link,
151
+ link: url,
135
152
  title: title,
136
153
  by_info: authors,
137
154
  image: og.image || image,
@@ -147,7 +164,7 @@ module Onebox
147
164
  else
148
165
  title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
149
166
  result = {
150
- link: link,
167
+ link: url,
151
168
  title: title,
152
169
  image: og.image || image,
153
170
  price: price
@@ -157,7 +174,10 @@ module Onebox
157
174
  result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
158
175
 
159
176
  summary = raw.at("#productDescription")
160
- result[:description] = og.description || (summary && summary.inner_text) || CGI.unescapeHTML(Onebox::Helpers.truncate(raw.css("meta[name=description]").first["content"], 250))
177
+
178
+ description = og.description || summary&.inner_text
179
+ description ||= raw.css("meta[name=description]").first&.[]("content")
180
+ result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
161
181
  end
162
182
 
163
183
  result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
@@ -15,7 +15,7 @@ module Onebox
15
15
  escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
16
16
 
17
17
  <<-HTML
18
- <audio controls>
18
+ <audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
19
19
  <source src="#{escaped_url}">
20
20
  <a href="#{escaped_url}">#{@url}</a>
21
21
  </audio>
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,11 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
+ end
16
+
17
+ def body_cacher
18
+ self.options&.[](:body_cacher)
15
19
  end
16
20
 
17
21
  def html?
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -20,7 +20,7 @@ module Onebox
20
20
  escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
21
21
  <<-HTML
22
22
  <div class="onebox video-onebox">
23
- <video width='100%' height='100%' controls>
23
+ <video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
24
24
  <source src='#{escaped_url}'>
25
25
  <a href='#{escaped_url}'>#{@url}</a>
26
26
  </video>
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -178,6 +189,10 @@ module Onebox
178
189
  url.gsub!("'", "&apos;")
179
190
  url.gsub!('"', "&quot;")
180
191
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
192
+
193
+ parsed = Addressable::URI.parse(url)
194
+ return "" unless parsed.host
195
+
181
196
  url
182
197
  end
183
198
 
@@ -32,7 +32,8 @@ module Onebox
32
32
  if method_name.end_with?(*integer_suffixes)
33
33
  value.to_i
34
34
  elsif method_name.end_with?(*url_suffixes)
35
- ::Onebox::Helpers.normalize_url_for_output(value)
35
+ result = Onebox::Helpers.normalize_url_for_output(value)
36
+ result unless Onebox::Helpers::blank?(result)
36
37
  else
37
38
  value
38
39
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.4"
4
+ VERSION = "2.2.10"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.4
4
+ version: 2.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-02-24 00:00:00.000000000 Z
13
+ date: 2021-04-02 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable