onebox 2.2.2 → 2.2.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 817b81480e38b1a133342d89f83991cd0e94217581ddb74c6275c6631c7285bf
4
- data.tar.gz: 545e95966e368c2b36a9792dd99934ec609635e19d95bb02ef1dd1fbcd51c5f6
3
+ metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
4
+ data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
5
5
  SHA512:
6
- metadata.gz: fc0ea7daff3babaa77cebf9e5e00161877a86106d15b1a2d4e1b1e56f1e4ba9ebfd6e3666a991d420e4ea71180dc8fcaf2bafc0ac659934399706462c1e45d69
7
- data.tar.gz: f509adf0a3ad1d557700a690bfb9b922ec9c181b68cf6ab61134394ee70c8ee4487d64faa863262a28cd795ed14e39bc774521572551c719faa39a1acc208a3e
6
+ metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
7
+ data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
@@ -15,24 +15,19 @@ jobs:
15
15
  strategy:
16
16
  matrix:
17
17
  ruby:
18
- - 2.4
19
18
  - 2.5
20
19
  - 2.6
20
+ - 2.7
21
+ - 3.0
21
22
 
22
23
  steps:
23
- - uses: actions/checkout@v1
24
+ - uses: actions/checkout@v2
24
25
 
25
26
  - name: Setup ruby
26
- uses: actions/setup-ruby@v1
27
+ uses: ruby/setup-ruby@v1
27
28
  with:
28
29
  ruby-version: ${{ matrix.ruby }}
29
- architecture: 'x64'
30
-
31
- - name: Setup bundler
32
- run: gem install bundler
33
-
34
- - name: Setup gems
35
- run: bundle install
30
+ bundler-cache: true
36
31
 
37
32
  - name: Rubocop
38
33
  run: bundle exec rubocop
@@ -49,6 +44,6 @@ jobs:
49
44
  - uses: actions/checkout@v2
50
45
 
51
46
  - name: Release Gem
52
- uses: CvX/publish-rubygems-action@master
47
+ uses: discourse/publish-rubygems-action@main
53
48
  env:
54
49
  RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
@@ -227,8 +227,10 @@ module Onebox
227
227
  d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
228
228
  d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
229
229
  d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
230
+ d[:image] = nil if Onebox::Helpers.blank?(d[:image])
230
231
 
231
232
  d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
233
+ d[:video] = nil if Onebox::Helpers.blank?(d[:video])
232
234
 
233
235
  d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
234
236
  if !Onebox::Helpers.blank?(d[:published_time])
@@ -11,11 +11,22 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # Have we cached the HTML body of the requested URL?
18
+ # If so, try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
21
+ if @body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
23
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
24
+ return canonical_link.to_s if canonical_link
25
+ end
26
+ end
27
+
17
28
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
29
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
30
  end
20
31
 
21
32
  @url
@@ -26,10 +37,9 @@ module Onebox
26
37
  end
27
38
 
28
39
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
32
- }
40
+ if @options && @options[:user_agent]
41
+ { 'User-Agent' => @options[:user_agent] }
42
+ end
33
43
  end
34
44
 
35
45
  private
@@ -15,7 +15,7 @@ module Onebox
15
15
  escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
16
16
 
17
17
  <<-HTML
18
- <audio controls>
18
+ <audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
19
19
  <source src="#{escaped_url}">
20
20
  <a href="#{escaped_url}">#{@url}</a>
21
21
  </audio>
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -22,8 +22,7 @@ module Onebox
22
22
  end
23
23
 
24
24
  def data
25
-
26
- @raw ||= ::MultiJson.load(open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
25
+ @raw ||= ::MultiJson.load(URI.open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
27
26
  body_text = @raw["body_text"]
28
27
 
29
28
  content_words = body_text.gsub("\n\n", "\n").gsub("\n", "<br>").split(" ") #one pass of removing double newline, then we change \n to <br> and later on we revert it back to \n this is a workaround to avoid losing newlines after we join it back.
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,8 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ body_cacher = self.options[:body_cacher] if self.options
15
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
16
  end
16
17
 
17
18
  def html?
@@ -6,7 +6,7 @@ module Onebox
6
6
  private
7
7
 
8
8
  def raw
9
- @raw ||= ::MultiJson.load(open(url, read_timeout: timeout))
9
+ @raw ||= ::MultiJson.load(URI.open(url, read_timeout: timeout))
10
10
  end
11
11
  end
12
12
  end
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -11,7 +11,7 @@ module Onebox
11
11
  private
12
12
 
13
13
  def get_xml
14
- doc = Nokogiri::XML(open(URI.join(@url, "?report=xml&format=text")))
14
+ doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
15
15
  pre = doc.xpath("//pre")
16
16
  Nokogiri::XML("<root>" + pre.text + "</root>")
17
17
  end
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -20,7 +20,7 @@ module Onebox
20
20
  escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
21
21
  <<-HTML
22
22
  <div class="onebox video-onebox">
23
- <video width='100%' height='100%' controls>
23
+ <video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
24
24
  <source src='#{escaped_url}'>
25
25
  <a href='#{escaped_url}'>#{@url}</a>
26
26
  </video>
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -178,6 +189,10 @@ module Onebox
178
189
  url.gsub!("'", "&apos;")
179
190
  url.gsub!('"', "&quot;")
180
191
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
192
+
193
+ parsed = Addressable::URI.parse(url)
194
+ return "" unless parsed.host
195
+
181
196
  url
182
197
  end
183
198
 
@@ -167,7 +167,7 @@ module Onebox
167
167
  @raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m)
168
168
 
169
169
  else
170
- contents = open(self.raw_template(m), read_timeout: timeout).read
170
+ contents = URI.open(self.raw_template(m), read_timeout: timeout).read
171
171
 
172
172
  contents_lines = contents.lines #get contents lines
173
173
  contents_lines_size = contents_lines.size #get number of lines
@@ -32,7 +32,8 @@ module Onebox
32
32
  if method_name.end_with?(*integer_suffixes)
33
33
  value.to_i
34
34
  elsif method_name.end_with?(*url_suffixes)
35
- ::Onebox::Helpers.normalize_url_for_output(value)
35
+ result = Onebox::Helpers.normalize_url_for_output(value)
36
+ result unless Onebox::Helpers::blank?(result)
36
37
  else
37
38
  value
38
39
  end
@@ -10,7 +10,7 @@ class Sanitize
10
10
 
11
11
  attributes: {
12
12
  'a' => RELAXED[:attributes]['a'] + %w(target),
13
- 'audio' => %w[controls],
13
+ 'audio' => %w[controls controlslist],
14
14
  'embed' => %w[height src type width],
15
15
  'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
16
16
  'source' => %w[src type],
@@ -35,7 +35,7 @@ module Onebox
35
35
  private
36
36
 
37
37
  def check
38
- res = open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
38
+ res = URI.open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
39
39
  @status = res.status.first.to_i
40
40
  rescue OpenURI::HTTPError => e
41
41
  @status = e.io.status.first.to_i
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.2"
4
+ VERSION = "2.2.9"
5
5
  end
data/onebox.gemspec CHANGED
@@ -41,5 +41,5 @@ Gem::Specification.new do |spec|
41
41
  spec.add_development_dependency 'haml', '~> 5.1'
42
42
  spec.add_development_dependency 'listen', '~> 2.10.0'
43
43
 
44
- spec.required_ruby_version = '>=2.4.0'
44
+ spec.required_ruby_version = '>=2.5.0'
45
45
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.2
4
+ version: 2.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-01-06 00:00:00.000000000 Z
13
+ date: 2021-03-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable
@@ -413,7 +413,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
413
413
  requirements:
414
414
  - - ">="
415
415
  - !ruby/object:Gem::Version
416
- version: 2.4.0
416
+ version: 2.5.0
417
417
  required_rubygems_version: !ruby/object:Gem::Requirement
418
418
  requirements:
419
419
  - - ">="