onebox 2.2.2 → 2.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +6 -11
- data/lib/onebox/engine/allowlisted_generic_onebox.rb +2 -0
- data/lib/onebox/engine/amazon_onebox.rb +16 -6
- data/lib/onebox/engine/audio_onebox.rb +1 -1
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/github_issue_onebox.rb +1 -2
- data/lib/onebox/engine/google_docs_onebox.rb +1 -1
- data/lib/onebox/engine/html.rb +2 -1
- data/lib/onebox/engine/json.rb +1 -1
- data/lib/onebox/engine/pastebin_onebox.rb +1 -1
- data/lib/onebox/engine/pubmed_onebox.rb +1 -1
- data/lib/onebox/engine/twitter_status_onebox.rb +1 -1
- data/lib/onebox/engine/video_onebox.rb +1 -1
- data/lib/onebox/helpers.rb +26 -11
- data/lib/onebox/mixins/git_blob_onebox.rb +1 -1
- data/lib/onebox/open_graph.rb +2 -1
- data/lib/onebox/sanitize_config.rb +1 -1
- data/lib/onebox/status_check.rb +1 -1
- data/lib/onebox/version.rb +1 -1
- data/onebox.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
|
4
|
+
data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
|
7
|
+
data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
|
data/.github/workflows/ci.yml
CHANGED
@@ -15,24 +15,19 @@ jobs:
|
|
15
15
|
strategy:
|
16
16
|
matrix:
|
17
17
|
ruby:
|
18
|
-
- 2.4
|
19
18
|
- 2.5
|
20
19
|
- 2.6
|
20
|
+
- 2.7
|
21
|
+
- 3.0
|
21
22
|
|
22
23
|
steps:
|
23
|
-
- uses: actions/checkout@
|
24
|
+
- uses: actions/checkout@v2
|
24
25
|
|
25
26
|
- name: Setup ruby
|
26
|
-
uses:
|
27
|
+
uses: ruby/setup-ruby@v1
|
27
28
|
with:
|
28
29
|
ruby-version: ${{ matrix.ruby }}
|
29
|
-
|
30
|
-
|
31
|
-
- name: Setup bundler
|
32
|
-
run: gem install bundler
|
33
|
-
|
34
|
-
- name: Setup gems
|
35
|
-
run: bundle install
|
30
|
+
bundler-cache: true
|
36
31
|
|
37
32
|
- name: Rubocop
|
38
33
|
run: bundle exec rubocop
|
@@ -49,6 +44,6 @@ jobs:
|
|
49
44
|
- uses: actions/checkout@v2
|
50
45
|
|
51
46
|
- name: Release Gem
|
52
|
-
uses:
|
47
|
+
uses: discourse/publish-rubygems-action@main
|
53
48
|
env:
|
54
49
|
RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
|
@@ -227,8 +227,10 @@ module Onebox
|
|
227
227
|
d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
|
228
228
|
d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
|
229
229
|
d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
|
230
|
+
d[:image] = nil if Onebox::Helpers.blank?(d[:image])
|
230
231
|
|
231
232
|
d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
|
233
|
+
d[:video] = nil if Onebox::Helpers.blank?(d[:video])
|
232
234
|
|
233
235
|
d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
|
234
236
|
if !Onebox::Helpers.blank?(d[:published_time])
|
@@ -11,11 +11,22 @@ module Onebox
|
|
11
11
|
include HTML
|
12
12
|
|
13
13
|
always_https
|
14
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
|
14
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
15
15
|
|
16
16
|
def url
|
17
|
+
# Have we cached the HTML body of the requested URL?
|
18
|
+
# If so, try to grab the canonical URL from that document,
|
19
|
+
# rather than guess at the best URL structure to use
|
20
|
+
if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
|
21
|
+
if @body_cacher.cached_response_body_exists?(uri.to_s)
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
|
23
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
24
|
+
return canonical_link.to_s if canonical_link
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
17
28
|
if match && match[:id]
|
18
|
-
return "https://www.amazon.#{tld}/
|
29
|
+
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
19
30
|
end
|
20
31
|
|
21
32
|
@url
|
@@ -26,10 +37,9 @@ module Onebox
|
|
26
37
|
end
|
27
38
|
|
28
39
|
def http_params
|
29
|
-
|
30
|
-
'User-Agent' =>
|
31
|
-
|
32
|
-
}
|
40
|
+
if @options && @options[:user_agent]
|
41
|
+
{ 'User-Agent' => @options[:user_agent] }
|
42
|
+
end
|
33
43
|
end
|
34
44
|
|
35
45
|
private
|
@@ -15,7 +15,7 @@ module Onebox
|
|
15
15
|
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
|
16
16
|
|
17
17
|
<<-HTML
|
18
|
-
<audio controls>
|
18
|
+
<audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
|
19
19
|
<source src="#{escaped_url}">
|
20
20
|
<a href="#{escaped_url}">#{@url}</a>
|
21
21
|
</audio>
|
@@ -22,8 +22,7 @@ module Onebox
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def data
|
25
|
-
|
26
|
-
@raw ||= ::MultiJson.load(open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
|
25
|
+
@raw ||= ::MultiJson.load(URI.open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
|
27
26
|
body_text = @raw["body_text"]
|
28
27
|
|
29
28
|
content_words = body_text.gsub("\n\n", "\n").gsub("\n", "<br>").split(" ") #one pass of removing double newline, then we change \n to <br> and later on we revert it back to \n this is a workaround to avoid losing newlines after we join it back.
|
@@ -47,7 +47,7 @@ module Onebox
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def get_og_data
|
50
|
-
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
|
50
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
51
51
|
html = Nokogiri::HTML(response)
|
52
52
|
og_data = {}
|
53
53
|
html.css('meta').each do |m|
|
data/lib/onebox/engine/html.rb
CHANGED
data/lib/onebox/engine/json.rb
CHANGED
@@ -31,7 +31,7 @@ module Onebox
|
|
31
31
|
|
32
32
|
def lines
|
33
33
|
return @lines if @lines
|
34
|
-
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
|
34
|
+
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
|
35
35
|
@lines = response.split("\n")
|
36
36
|
end
|
37
37
|
|
@@ -11,7 +11,7 @@ module Onebox
|
|
11
11
|
private
|
12
12
|
|
13
13
|
def get_xml
|
14
|
-
doc = Nokogiri::XML(open(URI.join(@url, "?report=xml&format=text")))
|
14
|
+
doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
|
15
15
|
pre = doc.xpath("//pre")
|
16
16
|
Nokogiri::XML("<root>" + pre.text + "</root>")
|
17
17
|
end
|
@@ -17,7 +17,7 @@ module Onebox
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def get_twitter_data
|
20
|
-
response = Onebox::Helpers.fetch_response(url,
|
20
|
+
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
|
21
21
|
html = Nokogiri::HTML(response)
|
22
22
|
twitter_data = {}
|
23
23
|
html.css('meta').each do |m|
|
@@ -20,7 +20,7 @@ module Onebox
|
|
20
20
|
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
|
21
21
|
<<-HTML
|
22
22
|
<div class="onebox video-onebox">
|
23
|
-
<video width='100%' height='100%' controls>
|
23
|
+
<video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
|
24
24
|
<source src='#{escaped_url}'>
|
25
25
|
<a href='#{escaped_url}'>#{@url}</a>
|
26
26
|
</video>
|
data/lib/onebox/helpers.rb
CHANGED
@@ -24,8 +24,8 @@ module Onebox
|
|
24
24
|
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
25
25
|
end
|
26
26
|
|
27
|
-
def self.fetch_html_doc(url, headers = nil)
|
28
|
-
response = (fetch_response(url,
|
27
|
+
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
|
28
|
+
response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
|
29
29
|
doc = Nokogiri::HTML(response)
|
30
30
|
uri = Addressable::URI.parse(url)
|
31
31
|
|
@@ -37,7 +37,7 @@ module Onebox
|
|
37
37
|
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
38
38
|
canonical_uri = Addressable::URI.parse(canonical_link)
|
39
39
|
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
|
40
|
-
response = (fetch_response(canonical_uri.to_s,
|
40
|
+
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
|
41
41
|
doc = Nokogiri::HTML(response) if response
|
42
42
|
end
|
43
43
|
end
|
@@ -45,16 +45,23 @@ module Onebox
|
|
45
45
|
doc
|
46
46
|
end
|
47
47
|
|
48
|
-
def self.fetch_response(location,
|
48
|
+
def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
|
49
|
+
redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
|
49
50
|
|
50
|
-
|
51
|
-
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
52
|
-
|
53
|
-
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
51
|
+
raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
|
54
52
|
|
55
53
|
uri = Addressable::URI.parse(location)
|
56
54
|
uri = Addressable::URI.join(domain, uri) if !uri.host
|
57
55
|
|
56
|
+
use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
|
57
|
+
if use_body_cacher
|
58
|
+
response_body = body_cacher.fetch_cached_response_body(uri.to_s)
|
59
|
+
|
60
|
+
if response_body.present?
|
61
|
+
return response_body
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
58
65
|
result = StringIO.new
|
59
66
|
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
|
60
67
|
http.open_timeout = Onebox.options.connect_timeout
|
@@ -86,9 +93,9 @@ module Onebox
|
|
86
93
|
response.error! unless [301, 302].include?(code)
|
87
94
|
return fetch_response(
|
88
95
|
response['location'],
|
89
|
-
|
90
|
-
"#{uri.scheme}://#{uri.host}",
|
91
|
-
redir_header
|
96
|
+
redirect_limit: redirect_limit - 1,
|
97
|
+
domain: "#{uri.scheme}://#{uri.host}",
|
98
|
+
headers: redir_header
|
92
99
|
)
|
93
100
|
end
|
94
101
|
|
@@ -98,6 +105,10 @@ module Onebox
|
|
98
105
|
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
99
106
|
end
|
100
107
|
|
108
|
+
if use_body_cacher && body_cacher.cache_response_body?(uri)
|
109
|
+
body_cacher.cache_response_body(uri.to_s, result.string)
|
110
|
+
end
|
111
|
+
|
101
112
|
return result.string
|
102
113
|
end
|
103
114
|
end
|
@@ -178,6 +189,10 @@ module Onebox
|
|
178
189
|
url.gsub!("'", "'")
|
179
190
|
url.gsub!('"', """)
|
180
191
|
url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
|
192
|
+
|
193
|
+
parsed = Addressable::URI.parse(url)
|
194
|
+
return "" unless parsed.host
|
195
|
+
|
181
196
|
url
|
182
197
|
end
|
183
198
|
|
@@ -167,7 +167,7 @@ module Onebox
|
|
167
167
|
@raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m)
|
168
168
|
|
169
169
|
else
|
170
|
-
contents = open(self.raw_template(m), read_timeout: timeout).read
|
170
|
+
contents = URI.open(self.raw_template(m), read_timeout: timeout).read
|
171
171
|
|
172
172
|
contents_lines = contents.lines #get contents lines
|
173
173
|
contents_lines_size = contents_lines.size #get number of lines
|
data/lib/onebox/open_graph.rb
CHANGED
@@ -32,7 +32,8 @@ module Onebox
|
|
32
32
|
if method_name.end_with?(*integer_suffixes)
|
33
33
|
value.to_i
|
34
34
|
elsif method_name.end_with?(*url_suffixes)
|
35
|
-
|
35
|
+
result = Onebox::Helpers.normalize_url_for_output(value)
|
36
|
+
result unless Onebox::Helpers::blank?(result)
|
36
37
|
else
|
37
38
|
value
|
38
39
|
end
|
@@ -10,7 +10,7 @@ class Sanitize
|
|
10
10
|
|
11
11
|
attributes: {
|
12
12
|
'a' => RELAXED[:attributes]['a'] + %w(target),
|
13
|
-
'audio' => %w[controls],
|
13
|
+
'audio' => %w[controls controlslist],
|
14
14
|
'embed' => %w[height src type width],
|
15
15
|
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
|
16
16
|
'source' => %w[src type],
|
data/lib/onebox/status_check.rb
CHANGED
@@ -35,7 +35,7 @@ module Onebox
|
|
35
35
|
private
|
36
36
|
|
37
37
|
def check
|
38
|
-
res = open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
|
38
|
+
res = URI.open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
|
39
39
|
@status = res.status.first.to_i
|
40
40
|
rescue OpenURI::HTTPError => e
|
41
41
|
@status = e.io.status.first.to_i
|
data/lib/onebox/version.rb
CHANGED
data/onebox.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-
|
13
|
+
date: 2021-03-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|
@@ -413,7 +413,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
413
413
|
requirements:
|
414
414
|
- - ">="
|
415
415
|
- !ruby/object:Gem::Version
|
416
|
-
version: 2.
|
416
|
+
version: 2.5.0
|
417
417
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
418
418
|
requirements:
|
419
419
|
- - ">="
|