onebox 2.2.2 → 2.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +6 -11
- data/lib/onebox/engine/allowlisted_generic_onebox.rb +2 -0
- data/lib/onebox/engine/amazon_onebox.rb +16 -6
- data/lib/onebox/engine/audio_onebox.rb +1 -1
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/github_issue_onebox.rb +1 -2
- data/lib/onebox/engine/google_docs_onebox.rb +1 -1
- data/lib/onebox/engine/html.rb +2 -1
- data/lib/onebox/engine/json.rb +1 -1
- data/lib/onebox/engine/pastebin_onebox.rb +1 -1
- data/lib/onebox/engine/pubmed_onebox.rb +1 -1
- data/lib/onebox/engine/twitter_status_onebox.rb +1 -1
- data/lib/onebox/engine/video_onebox.rb +1 -1
- data/lib/onebox/helpers.rb +26 -11
- data/lib/onebox/mixins/git_blob_onebox.rb +1 -1
- data/lib/onebox/open_graph.rb +2 -1
- data/lib/onebox/sanitize_config.rb +1 -1
- data/lib/onebox/status_check.rb +1 -1
- data/lib/onebox/version.rb +1 -1
- data/onebox.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 541807b758c73492e2868271a63b8f05ef4d94aad316be86fb5c42c3c3f0e2ce
|
|
4
|
+
data.tar.gz: 10f460d3d6a1b09a6087a38ad0692c9f652e027beb65b22845536480055b4a48
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d13068e9dbb437388ee689581bc64bb08bf4c5ae898755680c3c978a00fc4a4ee13004f0f2a3071f40d226d15e84f799a949d0d07572c6537ddd92950416d56b
|
|
7
|
+
data.tar.gz: 1efacb58ff5d24daff63cdd855a32ae80826f3585eee0305ba032269ee4431f1541848582bd5587aacffdcd6cb9c5bb35c650706c6028ed5e115e17daf362621
|
data/.github/workflows/ci.yml
CHANGED
|
@@ -15,24 +15,19 @@ jobs:
|
|
|
15
15
|
strategy:
|
|
16
16
|
matrix:
|
|
17
17
|
ruby:
|
|
18
|
-
- 2.4
|
|
19
18
|
- 2.5
|
|
20
19
|
- 2.6
|
|
20
|
+
- 2.7
|
|
21
|
+
- 3.0
|
|
21
22
|
|
|
22
23
|
steps:
|
|
23
|
-
- uses: actions/checkout@
|
|
24
|
+
- uses: actions/checkout@v2
|
|
24
25
|
|
|
25
26
|
- name: Setup ruby
|
|
26
|
-
uses:
|
|
27
|
+
uses: ruby/setup-ruby@v1
|
|
27
28
|
with:
|
|
28
29
|
ruby-version: ${{ matrix.ruby }}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
- name: Setup bundler
|
|
32
|
-
run: gem install bundler
|
|
33
|
-
|
|
34
|
-
- name: Setup gems
|
|
35
|
-
run: bundle install
|
|
30
|
+
bundler-cache: true
|
|
36
31
|
|
|
37
32
|
- name: Rubocop
|
|
38
33
|
run: bundle exec rubocop
|
|
@@ -49,6 +44,6 @@ jobs:
|
|
|
49
44
|
- uses: actions/checkout@v2
|
|
50
45
|
|
|
51
46
|
- name: Release Gem
|
|
52
|
-
uses:
|
|
47
|
+
uses: discourse/publish-rubygems-action@main
|
|
53
48
|
env:
|
|
54
49
|
RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
|
|
@@ -227,8 +227,10 @@ module Onebox
|
|
|
227
227
|
d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
|
|
228
228
|
d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
|
|
229
229
|
d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
|
|
230
|
+
d[:image] = nil if Onebox::Helpers.blank?(d[:image])
|
|
230
231
|
|
|
231
232
|
d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
|
|
233
|
+
d[:video] = nil if Onebox::Helpers.blank?(d[:video])
|
|
232
234
|
|
|
233
235
|
d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
|
|
234
236
|
if !Onebox::Helpers.blank?(d[:published_time])
|
|
@@ -11,11 +11,22 @@ module Onebox
|
|
|
11
11
|
include HTML
|
|
12
12
|
|
|
13
13
|
always_https
|
|
14
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
|
|
14
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
|
15
15
|
|
|
16
16
|
def url
|
|
17
|
+
# Have we cached the HTML body of the requested URL?
|
|
18
|
+
# If so, try to grab the canonical URL from that document,
|
|
19
|
+
# rather than guess at the best URL structure to use
|
|
20
|
+
if @body_cacher && @body_cacher.respond_to?('cache_response_body?')
|
|
21
|
+
if @body_cacher.cached_response_body_exists?(uri.to_s)
|
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, @body_cacher)
|
|
23
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
|
24
|
+
return canonical_link.to_s if canonical_link
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
17
28
|
if match && match[:id]
|
|
18
|
-
return "https://www.amazon.#{tld}/
|
|
29
|
+
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
|
19
30
|
end
|
|
20
31
|
|
|
21
32
|
@url
|
|
@@ -26,10 +37,9 @@ module Onebox
|
|
|
26
37
|
end
|
|
27
38
|
|
|
28
39
|
def http_params
|
|
29
|
-
|
|
30
|
-
'User-Agent' =>
|
|
31
|
-
|
|
32
|
-
}
|
|
40
|
+
if @options && @options[:user_agent]
|
|
41
|
+
{ 'User-Agent' => @options[:user_agent] }
|
|
42
|
+
end
|
|
33
43
|
end
|
|
34
44
|
|
|
35
45
|
private
|
|
@@ -15,7 +15,7 @@ module Onebox
|
|
|
15
15
|
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
|
|
16
16
|
|
|
17
17
|
<<-HTML
|
|
18
|
-
<audio controls>
|
|
18
|
+
<audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
|
|
19
19
|
<source src="#{escaped_url}">
|
|
20
20
|
<a href="#{escaped_url}">#{@url}</a>
|
|
21
21
|
</audio>
|
|
@@ -22,8 +22,7 @@ module Onebox
|
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
def data
|
|
25
|
-
|
|
26
|
-
@raw ||= ::MultiJson.load(open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
|
|
25
|
+
@raw ||= ::MultiJson.load(URI.open(url, "Accept" => "application/vnd.github.v3.text+json", read_timeout: timeout)) #custom Accept header so we can get body as text.
|
|
27
26
|
body_text = @raw["body_text"]
|
|
28
27
|
|
|
29
28
|
content_words = body_text.gsub("\n\n", "\n").gsub("\n", "<br>").split(" ") #one pass of removing double newline, then we change \n to <br> and later on we revert it back to \n this is a workaround to avoid losing newlines after we join it back.
|
|
@@ -47,7 +47,7 @@ module Onebox
|
|
|
47
47
|
end
|
|
48
48
|
|
|
49
49
|
def get_og_data
|
|
50
|
-
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
|
|
50
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
|
51
51
|
html = Nokogiri::HTML(response)
|
|
52
52
|
og_data = {}
|
|
53
53
|
html.css('meta').each do |m|
|
data/lib/onebox/engine/html.rb
CHANGED
data/lib/onebox/engine/json.rb
CHANGED
|
@@ -31,7 +31,7 @@ module Onebox
|
|
|
31
31
|
|
|
32
32
|
def lines
|
|
33
33
|
return @lines if @lines
|
|
34
|
-
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
|
|
34
|
+
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
|
|
35
35
|
@lines = response.split("\n")
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -11,7 +11,7 @@ module Onebox
|
|
|
11
11
|
private
|
|
12
12
|
|
|
13
13
|
def get_xml
|
|
14
|
-
doc = Nokogiri::XML(open(URI.join(@url, "?report=xml&format=text")))
|
|
14
|
+
doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
|
|
15
15
|
pre = doc.xpath("//pre")
|
|
16
16
|
Nokogiri::XML("<root>" + pre.text + "</root>")
|
|
17
17
|
end
|
|
@@ -17,7 +17,7 @@ module Onebox
|
|
|
17
17
|
private
|
|
18
18
|
|
|
19
19
|
def get_twitter_data
|
|
20
|
-
response = Onebox::Helpers.fetch_response(url,
|
|
20
|
+
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
|
|
21
21
|
html = Nokogiri::HTML(response)
|
|
22
22
|
twitter_data = {}
|
|
23
23
|
html.css('meta').each do |m|
|
|
@@ -20,7 +20,7 @@ module Onebox
|
|
|
20
20
|
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
|
|
21
21
|
<<-HTML
|
|
22
22
|
<div class="onebox video-onebox">
|
|
23
|
-
<video width='100%' height='100%' controls>
|
|
23
|
+
<video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
|
|
24
24
|
<source src='#{escaped_url}'>
|
|
25
25
|
<a href='#{escaped_url}'>#{@url}</a>
|
|
26
26
|
</video>
|
data/lib/onebox/helpers.rb
CHANGED
|
@@ -24,8 +24,8 @@ module Onebox
|
|
|
24
24
|
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def self.fetch_html_doc(url, headers = nil)
|
|
28
|
-
response = (fetch_response(url,
|
|
27
|
+
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
|
|
28
|
+
response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
|
|
29
29
|
doc = Nokogiri::HTML(response)
|
|
30
30
|
uri = Addressable::URI.parse(url)
|
|
31
31
|
|
|
@@ -37,7 +37,7 @@ module Onebox
|
|
|
37
37
|
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
|
38
38
|
canonical_uri = Addressable::URI.parse(canonical_link)
|
|
39
39
|
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
|
|
40
|
-
response = (fetch_response(canonical_uri.to_s,
|
|
40
|
+
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
|
|
41
41
|
doc = Nokogiri::HTML(response) if response
|
|
42
42
|
end
|
|
43
43
|
end
|
|
@@ -45,16 +45,23 @@ module Onebox
|
|
|
45
45
|
doc
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
def self.fetch_response(location,
|
|
48
|
+
def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
|
|
49
|
+
redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
|
52
|
-
|
|
53
|
-
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
|
51
|
+
raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
|
|
54
52
|
|
|
55
53
|
uri = Addressable::URI.parse(location)
|
|
56
54
|
uri = Addressable::URI.join(domain, uri) if !uri.host
|
|
57
55
|
|
|
56
|
+
use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
|
|
57
|
+
if use_body_cacher
|
|
58
|
+
response_body = body_cacher.fetch_cached_response_body(uri.to_s)
|
|
59
|
+
|
|
60
|
+
if response_body.present?
|
|
61
|
+
return response_body
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
58
65
|
result = StringIO.new
|
|
59
66
|
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
|
|
60
67
|
http.open_timeout = Onebox.options.connect_timeout
|
|
@@ -86,9 +93,9 @@ module Onebox
|
|
|
86
93
|
response.error! unless [301, 302].include?(code)
|
|
87
94
|
return fetch_response(
|
|
88
95
|
response['location'],
|
|
89
|
-
|
|
90
|
-
"#{uri.scheme}://#{uri.host}",
|
|
91
|
-
redir_header
|
|
96
|
+
redirect_limit: redirect_limit - 1,
|
|
97
|
+
domain: "#{uri.scheme}://#{uri.host}",
|
|
98
|
+
headers: redir_header
|
|
92
99
|
)
|
|
93
100
|
end
|
|
94
101
|
|
|
@@ -98,6 +105,10 @@ module Onebox
|
|
|
98
105
|
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
|
99
106
|
end
|
|
100
107
|
|
|
108
|
+
if use_body_cacher && body_cacher.cache_response_body?(uri)
|
|
109
|
+
body_cacher.cache_response_body(uri.to_s, result.string)
|
|
110
|
+
end
|
|
111
|
+
|
|
101
112
|
return result.string
|
|
102
113
|
end
|
|
103
114
|
end
|
|
@@ -178,6 +189,10 @@ module Onebox
|
|
|
178
189
|
url.gsub!("'", "'")
|
|
179
190
|
url.gsub!('"', """)
|
|
180
191
|
url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
|
|
192
|
+
|
|
193
|
+
parsed = Addressable::URI.parse(url)
|
|
194
|
+
return "" unless parsed.host
|
|
195
|
+
|
|
181
196
|
url
|
|
182
197
|
end
|
|
183
198
|
|
|
@@ -167,7 +167,7 @@ module Onebox
|
|
|
167
167
|
@raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m)
|
|
168
168
|
|
|
169
169
|
else
|
|
170
|
-
contents = open(self.raw_template(m), read_timeout: timeout).read
|
|
170
|
+
contents = URI.open(self.raw_template(m), read_timeout: timeout).read
|
|
171
171
|
|
|
172
172
|
contents_lines = contents.lines #get contents lines
|
|
173
173
|
contents_lines_size = contents_lines.size #get number of lines
|
data/lib/onebox/open_graph.rb
CHANGED
|
@@ -32,7 +32,8 @@ module Onebox
|
|
|
32
32
|
if method_name.end_with?(*integer_suffixes)
|
|
33
33
|
value.to_i
|
|
34
34
|
elsif method_name.end_with?(*url_suffixes)
|
|
35
|
-
|
|
35
|
+
result = Onebox::Helpers.normalize_url_for_output(value)
|
|
36
|
+
result unless Onebox::Helpers::blank?(result)
|
|
36
37
|
else
|
|
37
38
|
value
|
|
38
39
|
end
|
|
@@ -10,7 +10,7 @@ class Sanitize
|
|
|
10
10
|
|
|
11
11
|
attributes: {
|
|
12
12
|
'a' => RELAXED[:attributes]['a'] + %w(target),
|
|
13
|
-
'audio' => %w[controls],
|
|
13
|
+
'audio' => %w[controls controlslist],
|
|
14
14
|
'embed' => %w[height src type width],
|
|
15
15
|
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
|
|
16
16
|
'source' => %w[src type],
|
data/lib/onebox/status_check.rb
CHANGED
|
@@ -35,7 +35,7 @@ module Onebox
|
|
|
35
35
|
private
|
|
36
36
|
|
|
37
37
|
def check
|
|
38
|
-
res = open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
|
|
38
|
+
res = URI.open(@url, read_timeout: (@options.timeout || Onebox.options.timeout))
|
|
39
39
|
@status = res.status.first.to_i
|
|
40
40
|
rescue OpenURI::HTTPError => e
|
|
41
41
|
@status = e.io.status.first.to_i
|
data/lib/onebox/version.rb
CHANGED
data/onebox.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: onebox
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.2.
|
|
4
|
+
version: 2.2.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Joanna Zeta
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2021-
|
|
13
|
+
date: 2021-03-31 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: addressable
|
|
@@ -413,7 +413,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
413
413
|
requirements:
|
|
414
414
|
- - ">="
|
|
415
415
|
- !ruby/object:Gem::Version
|
|
416
|
-
version: 2.
|
|
416
|
+
version: 2.5.0
|
|
417
417
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
418
418
|
requirements:
|
|
419
419
|
- - ">="
|