onebox 2.2.2 → 2.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 817b81480e38b1a133342d89f83991cd0e94217581ddb74c6275c6631c7285bf
4
- data.tar.gz: 545e95966e368c2b36a9792dd99934ec609635e19d95bb02ef1dd1fbcd51c5f6
3
+ metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
4
+ data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
5
5
  SHA512:
6
- metadata.gz: fc0ea7daff3babaa77cebf9e5e00161877a86106d15b1a2d4e1b1e56f1e4ba9ebfd6e3666a991d420e4ea71180dc8fcaf2bafc0ac659934399706462c1e45d69
7
- data.tar.gz: f509adf0a3ad1d557700a690bfb9b922ec9c181b68cf6ab61134394ee70c8ee4487d64faa863262a28cd795ed14e39bc774521572551c719faa39a1acc208a3e
6
+ metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
7
+ data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
@@ -15,24 +15,19 @@ jobs:
15
15
  strategy:
16
16
  matrix:
17
17
  ruby:
18
- - 2.4
19
18
  - 2.5
20
19
  - 2.6
20
+ - 2.7
21
+ - 3.0
21
22
 
22
23
  steps:
23
- - uses: actions/checkout@v1
24
+ - uses: actions/checkout@v2
24
25
 
25
26
  - name: Setup ruby
26
- uses: actions/setup-ruby@v1
27
+ uses: ruby/setup-ruby@v1
27
28
  with:
28
29
  ruby-version: ${{ matrix.ruby }}
29
- architecture: 'x64'
30
-
31
- - name: Setup bundler
32
- run: gem install bundler
33
-
34
- - name: Setup gems
35
- run: bundle install
30
+ bundler-cache: true
36
31
 
37
32
  - name: Rubocop
38
33
  run: bundle exec rubocop
@@ -49,6 +44,6 @@ jobs:
49
44
  - uses: actions/checkout@v2
50
45
 
51
46
  - name: Release Gem
52
- uses: CvX/publish-rubygems-action@master
47
+ uses: discourse/publish-rubygems-action@main
53
48
  env:
54
49
  RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
@@ -227,8 +227,10 @@ module Onebox
227
227
  d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
228
228
  d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
229
229
  d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
230
+ d[:image] = nil if Onebox::Helpers.blank?(d[:image])
230
231
 
231
232
  d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
233
+ d[:video] = nil if Onebox::Helpers.blank?(d[:video])
232
234
 
233
235
  d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
234
236
  if !Onebox::Helpers.blank?(d[:published_time])
@@ -11,11 +11,22 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # Have we cached the HTML body of the requested URL?
18
+ # If so, try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
21
+ if @body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
23
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
24
+ return canonical_link.to_s if canonical_link
25
+ end
26
+ end
27
+
17
28
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
29
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
30
  end
20
31
 
21
32
  @url
@@ -26,10 +37,9 @@ module Onebox
26
37
  end
27
38
 
28
39
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
32
- }
40
+ if @options && @options[:user_agent]
41
+ { 'User-Agent' => @options[:user_agent] }
42
+ end
33
43
  end
34
44
 
35
45
  private
@@ -15,7 +15,7 @@ module Onebox
15
15
  escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
16
16
 
17
17
  <<-HTML
18
- <audio controls>
18
+ <audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
19
19
  <source src="#{escaped_url}">
20
20
  <a href="#{escaped_url}">#{@url}</a>
21
21
  </audio>
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -22,8 +22,7 @@ module Onebox
22
22
  end
23
23
 
24
24
  def data
25
-
26
- @raw ||= ::MultiJson.load(open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
25
+ @raw ||= ::MultiJson.load(URI.open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
27
26
  body_text = @raw["body_text"]
28
27
 
29
28
  content_words = body_text.gsub("\n\n", "\n").gsub("\n", "<br>").split(" ") #one pass of removing double newline, then we change \n to <br> and later on we revert it back to \n this is a workaround to avoid losing newlines after we join it back.
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,8 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ body_cacher = self.options[:body_cacher] if self.options
15
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
16
  end
16
17
 
17
18
  def html?
@@ -6,7 +6,7 @@ module Onebox
6
6
  private
7
7
 
8
8
  def raw
9
- @raw ||= ::MultiJson.load(open(url, read_timeout: timeout))
9
+ @raw ||= ::MultiJson.load(URI.open(url, read_timeout: timeout))
10
10
  end
11
11
  end
12
12
  end
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -11,7 +11,7 @@ module Onebox
11
11
  private
12
12
 
13
13
  def get_xml
14
- doc = Nokogiri::XML(open(URI.join(@url, "?report=xml&format=text")))
14
+ doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
15
15
  pre = doc.xpath("//pre")
16
16
  Nokogiri::XML("<root>" + pre.text + "</root>")
17
17
  end
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -20,7 +20,7 @@ module Onebox
20
20
  escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
21
21
  <<-HTML
22
22
  <div class="onebox video-onebox">
23
- <video width='100%' height='100%' controls>
23
+ <video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
24
24
  <source src='#{escaped_url}'>
25
25
  <a href='#{escaped_url}'>#{@url}</a>
26
26
  </video>
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -178,6 +189,10 @@ module Onebox
178
189
  url.gsub!("'", "&apos;")
179
190
  url.gsub!('"', "&quot;")
180
191
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
192
+
193
+ parsed = Addressable::URI.parse(url)
194
+ return "" unless parsed.host
195
+
181
196
  url
182
197
  end
183
198
 
@@ -167,7 +167,7 @@ module Onebox
167
167
  @raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m)
168
168
 
169
169
  else
170
- contents = open(self.raw_template(m), read_timeout: timeout).read
170
+ contents = URI.open(self.raw_template(m), read_timeout: timeout).read
171
171
 
172
172
  contents_lines = contents.lines #get contents lines
173
173
  contents_lines_size = contents_lines.size #get number of lines
@@ -32,7 +32,8 @@ module Onebox
32
32
  if method_name.end_with?(*integer_suffixes)
33
33
  value.to_i
34
34
  elsif method_name.end_with?(*url_suffixes)
35
- ::Onebox::Helpers.normalize_url_for_output(value)
35
+ result = Onebox::Helpers.normalize_url_for_output(value)
36
+ result unless Onebox::Helpers::blank?(result)
36
37
  else
37
38
  value
38
39
  end
@@ -10,7 +10,7 @@ class Sanitize
10
10
 
11
11
  attributes: {
12
12
  'a' => RELAXED[:attributes]['a'] + %w(target),
13
- 'audio' => %w[controls],
13
+ 'audio' => %w[controls controlslist],
14
14
  'embed' => %w[height src type width],
15
15
  'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
16
16
  'source' => %w[src type],
@@ -35,7 +35,7 @@ module Onebox
35
35
  private
36
36
 
37
37
  def check
38
- res = open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
38
+ res = URI.open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
39
39
  @status = res.status.first.to_i
40
40
  rescue OpenURI::HTTPError => e
41
41
  @status = e.io.status.first.to_i
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.2"
4
+ VERSION = "2.2.9"
5
5
  end
data/onebox.gemspec CHANGED
@@ -41,5 +41,5 @@ Gem::Specification.new do |spec|
41
41
  spec.add_development_dependency 'haml', '~> 5.1'
42
42
  spec.add_development_dependency 'listen', '~> 2.10.0'
43
43
 
44
- spec.required_ruby_version = '>=2.4.0'
44
+ spec.required_ruby_version = '>=2.5.0'
45
45
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.2
4
+ version: 2.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-01-06 00:00:00.000000000 Z
13
+ date: 2021-03-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable
@@ -413,7 +413,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
413
413
  requirements:
414
414
  - - ">="
415
415
  - !ruby/object:Gem::Version
416
- version: 2.4.0
416
+ version: 2.5.0
417
417
  required_rubygems_version: !ruby/object:Gem::Requirement
418
418
  requirements:
419
419
  - - ">="