onebox 2.2.6 → 2.2.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/lib/onebox/engine/allowlisted_generic_onebox.rb +2 -0
- data/lib/onebox/engine/amazon_onebox.rb +31 -11
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/github_pullrequest_onebox.rb +5 -0
- data/lib/onebox/engine/google_docs_onebox.rb +1 -1
- data/lib/onebox/engine/html.rb +5 -1
- data/lib/onebox/engine/pastebin_onebox.rb +1 -1
- data/lib/onebox/engine/twitter_status_onebox.rb +1 -1
- data/lib/onebox/engine/youtube_onebox.rb +32 -4
- data/lib/onebox/helpers.rb +27 -12
- data/lib/onebox/open_graph.rb +2 -1
- data/lib/onebox/sanitize_config.rb +1 -1
- data/lib/onebox/version.rb +1 -1
- data/templates/githubpullrequest.mustache +16 -3
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cdbe1e67e709c8a2c8179a7118f4b51447a15a1bf1b56619fc601d3a9486123e
|
4
|
+
data.tar.gz: 8a72a7fc2152789c4103de330b7dcd69ec9a9c487fdc499a6068bdd0840e2e26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3fe9a20e93b0aea3a3a7362bb5e9f026ddb63ab0c7edb64f5b7a1d9831fa76d001314ea0fdf07c92daefe95fc096e8068d28e3e6f5c7860689e5545646031084
|
7
|
+
data.tar.gz: 4e2e19424dab311c7a56feec82486333520f72575f95acccd1a11b8419616d2ea895e02472542d6bcdce7c5d6fbf72f887eab3941f67dd546762349bec3b66a9
|
data/.github/workflows/ci.yml
CHANGED
@@ -227,8 +227,10 @@ module Onebox
|
|
227
227
|
d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
|
228
228
|
d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
|
229
229
|
d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
|
230
|
+
d[:image] = nil if Onebox::Helpers.blank?(d[:image])
|
230
231
|
|
231
232
|
d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
|
233
|
+
d[:video] = nil if Onebox::Helpers.blank?(d[:video])
|
232
234
|
|
233
235
|
d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
|
234
236
|
if !Onebox::Helpers.blank?(d[:published_time])
|
@@ -11,11 +11,25 @@ module Onebox
|
|
11
11
|
include HTML
|
12
12
|
|
13
13
|
always_https
|
14
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
|
14
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
|
15
15
|
|
16
16
|
def url
|
17
|
+
# If possible, fetch the cached HTML body immediately so we can
|
18
|
+
# try to grab the canonical URL from that document,
|
19
|
+
# rather than guess at the best URL structure to use
|
20
|
+
if body_cacher&.respond_to?('cache_response_body?')
|
21
|
+
if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
|
22
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if @raw
|
27
|
+
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
|
28
|
+
return canonical_link.to_s if canonical_link
|
29
|
+
end
|
30
|
+
|
17
31
|
if match && match[:id]
|
18
|
-
return "https://www.amazon.#{tld}/
|
32
|
+
return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
|
19
33
|
end
|
20
34
|
|
21
35
|
@url
|
@@ -26,16 +40,15 @@ module Onebox
|
|
26
40
|
end
|
27
41
|
|
28
42
|
def http_params
|
29
|
-
|
30
|
-
'User-Agent' =>
|
31
|
-
|
32
|
-
}
|
43
|
+
if @options && @options[:user_agent]
|
44
|
+
{ 'User-Agent' => @options[:user_agent] }
|
45
|
+
end
|
33
46
|
end
|
34
47
|
|
35
48
|
private
|
36
49
|
|
37
50
|
def match
|
38
|
-
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[
|
51
|
+
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
|
39
52
|
end
|
40
53
|
|
41
54
|
def image
|
@@ -50,6 +63,10 @@ module Onebox
|
|
50
63
|
end
|
51
64
|
|
52
65
|
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
66
|
+
attributes = landing_image.first.attributes
|
67
|
+
|
68
|
+
return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
|
69
|
+
|
53
70
|
landing_image.first["src"].to_s
|
54
71
|
end
|
55
72
|
|
@@ -100,7 +117,7 @@ module Onebox
|
|
100
117
|
end
|
101
118
|
|
102
119
|
result = {
|
103
|
-
link:
|
120
|
+
link: url,
|
104
121
|
title: title,
|
105
122
|
by_info: authors,
|
106
123
|
image: og.image || image,
|
@@ -131,7 +148,7 @@ module Onebox
|
|
131
148
|
end
|
132
149
|
|
133
150
|
result = {
|
134
|
-
link:
|
151
|
+
link: url,
|
135
152
|
title: title,
|
136
153
|
by_info: authors,
|
137
154
|
image: og.image || image,
|
@@ -147,7 +164,7 @@ module Onebox
|
|
147
164
|
else
|
148
165
|
title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
|
149
166
|
result = {
|
150
|
-
link:
|
167
|
+
link: url,
|
151
168
|
title: title,
|
152
169
|
image: og.image || image,
|
153
170
|
price: price
|
@@ -157,7 +174,10 @@ module Onebox
|
|
157
174
|
result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
|
158
175
|
|
159
176
|
summary = raw.at("#productDescription")
|
160
|
-
|
177
|
+
|
178
|
+
description = og.description || summary&.inner_text
|
179
|
+
description ||= raw.css("meta[name=description]").first&.[]("content")
|
180
|
+
result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
|
161
181
|
end
|
162
182
|
|
163
183
|
result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
|
@@ -20,6 +20,7 @@ module Onebox
|
|
20
20
|
@match ||= @url.match(%r{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/pull/(?<number>[^/]+)})
|
21
21
|
end
|
22
22
|
|
23
|
+
GITHUB_COMMENT_REGEX = /(<!--.*?-->\r\n)/
|
23
24
|
def data
|
24
25
|
result = raw.clone
|
25
26
|
result['link'] = link
|
@@ -31,6 +32,10 @@ module Onebox
|
|
31
32
|
|
32
33
|
ulink = URI(link)
|
33
34
|
result['domain'] = "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}"
|
35
|
+
|
36
|
+
body = (result['body'] || '').gsub(GITHUB_COMMENT_REGEX, '')
|
37
|
+
result['body'] = body.present? ? body : nil
|
38
|
+
|
34
39
|
result
|
35
40
|
end
|
36
41
|
end
|
@@ -47,7 +47,7 @@ module Onebox
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def get_og_data
|
50
|
-
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
|
50
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
51
51
|
html = Nokogiri::HTML(response)
|
52
52
|
og_data = {}
|
53
53
|
html.css('meta').each do |m|
|
data/lib/onebox/engine/html.rb
CHANGED
@@ -11,7 +11,11 @@ module Onebox
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def raw
|
14
|
-
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
|
14
|
+
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
|
15
|
+
end
|
16
|
+
|
17
|
+
def body_cacher
|
18
|
+
self.options&.[](:body_cacher)
|
15
19
|
end
|
16
20
|
|
17
21
|
def html?
|
@@ -31,7 +31,7 @@ module Onebox
|
|
31
31
|
|
32
32
|
def lines
|
33
33
|
return @lines if @lines
|
34
|
-
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
|
34
|
+
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
|
35
35
|
@lines = response.split("\n")
|
36
36
|
end
|
37
37
|
|
@@ -17,7 +17,7 @@ module Onebox
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def get_twitter_data
|
20
|
-
response = Onebox::Helpers.fetch_response(url,
|
20
|
+
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
|
21
21
|
html = Nokogiri::HTML(response)
|
22
22
|
twitter_data = {}
|
23
23
|
html.css('meta').each do |m|
|
@@ -13,11 +13,36 @@ module Onebox
|
|
13
13
|
WIDTH ||= 480
|
14
14
|
HEIGHT ||= 360
|
15
15
|
|
16
|
-
def
|
17
|
-
|
16
|
+
def parse_embed_response
|
17
|
+
return unless video_id
|
18
|
+
return @parse_embed_response if defined?(@parse_embed_response)
|
19
|
+
|
20
|
+
embed_url = "https://www.youtube.com/embed/#{video_id}"
|
21
|
+
@embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
|
22
|
+
|
23
|
+
begin
|
24
|
+
script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
|
25
|
+
match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
|
26
|
+
|
27
|
+
yt_json = ::JSON.parse(match[:json])
|
28
|
+
renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
|
29
|
+
|
30
|
+
title = renderer['title']['runs'].first['text']
|
31
|
+
|
32
|
+
image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
|
33
|
+
rescue
|
34
|
+
return
|
35
|
+
end
|
36
|
+
|
37
|
+
@parse_embed_response = { image: image, title: title }
|
38
|
+
end
|
18
39
|
|
40
|
+
def placeholder_html
|
19
41
|
if video_id || list_id
|
20
|
-
|
42
|
+
result = parse_embed_response
|
43
|
+
result ||= get_opengraph.data
|
44
|
+
|
45
|
+
"<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
|
21
46
|
else
|
22
47
|
to_html
|
23
48
|
end
|
@@ -52,7 +77,10 @@ module Onebox
|
|
52
77
|
end
|
53
78
|
|
54
79
|
def video_title
|
55
|
-
@video_title ||=
|
80
|
+
@video_title ||= begin
|
81
|
+
result = parse_embed_response || get_opengraph.data
|
82
|
+
result[:title]
|
83
|
+
end
|
56
84
|
end
|
57
85
|
|
58
86
|
private
|
data/lib/onebox/helpers.rb
CHANGED
@@ -7,7 +7,7 @@ module Onebox
|
|
7
7
|
|
8
8
|
class DownloadTooLarge < StandardError; end
|
9
9
|
|
10
|
-
IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com']
|
10
|
+
IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com', 'youtube.com']
|
11
11
|
|
12
12
|
def self.symbolize_keys(hash)
|
13
13
|
return {} if hash.nil?
|
@@ -24,8 +24,8 @@ module Onebox
|
|
24
24
|
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
25
25
|
end
|
26
26
|
|
27
|
-
def self.fetch_html_doc(url, headers = nil)
|
28
|
-
response = (fetch_response(url,
|
27
|
+
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
|
28
|
+
response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
|
29
29
|
doc = Nokogiri::HTML(response)
|
30
30
|
uri = Addressable::URI.parse(url)
|
31
31
|
|
@@ -37,7 +37,7 @@ module Onebox
|
|
37
37
|
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
38
38
|
canonical_uri = Addressable::URI.parse(canonical_link)
|
39
39
|
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
|
40
|
-
response = (fetch_response(canonical_uri.to_s,
|
40
|
+
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
|
41
41
|
doc = Nokogiri::HTML(response) if response
|
42
42
|
end
|
43
43
|
end
|
@@ -45,16 +45,23 @@ module Onebox
|
|
45
45
|
doc
|
46
46
|
end
|
47
47
|
|
48
|
-
def self.fetch_response(location,
|
48
|
+
def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
|
49
|
+
redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
|
49
50
|
|
50
|
-
|
51
|
-
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
52
|
-
|
53
|
-
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
51
|
+
raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
|
54
52
|
|
55
53
|
uri = Addressable::URI.parse(location)
|
56
54
|
uri = Addressable::URI.join(domain, uri) if !uri.host
|
57
55
|
|
56
|
+
use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
|
57
|
+
if use_body_cacher
|
58
|
+
response_body = body_cacher.fetch_cached_response_body(uri.to_s)
|
59
|
+
|
60
|
+
if response_body.present?
|
61
|
+
return response_body
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
58
65
|
result = StringIO.new
|
59
66
|
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
|
60
67
|
http.open_timeout = Onebox.options.connect_timeout
|
@@ -86,9 +93,9 @@ module Onebox
|
|
86
93
|
response.error! unless [301, 302].include?(code)
|
87
94
|
return fetch_response(
|
88
95
|
response['location'],
|
89
|
-
|
90
|
-
"#{uri.scheme}://#{uri.host}",
|
91
|
-
redir_header
|
96
|
+
redirect_limit: redirect_limit - 1,
|
97
|
+
domain: "#{uri.scheme}://#{uri.host}",
|
98
|
+
headers: redir_header
|
92
99
|
)
|
93
100
|
end
|
94
101
|
|
@@ -98,6 +105,10 @@ module Onebox
|
|
98
105
|
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
99
106
|
end
|
100
107
|
|
108
|
+
if use_body_cacher && body_cacher.cache_response_body?(uri)
|
109
|
+
body_cacher.cache_response_body(uri.to_s, result.string)
|
110
|
+
end
|
111
|
+
|
101
112
|
return result.string
|
102
113
|
end
|
103
114
|
end
|
@@ -178,6 +189,10 @@ module Onebox
|
|
178
189
|
url.gsub!("'", "'")
|
179
190
|
url.gsub!('"', """)
|
180
191
|
url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
|
192
|
+
|
193
|
+
parsed = Addressable::URI.parse(url)
|
194
|
+
return "" unless parsed.host
|
195
|
+
|
181
196
|
url
|
182
197
|
end
|
183
198
|
|
data/lib/onebox/open_graph.rb
CHANGED
@@ -32,7 +32,8 @@ module Onebox
|
|
32
32
|
if method_name.end_with?(*integer_suffixes)
|
33
33
|
value.to_i
|
34
34
|
elsif method_name.end_with?(*url_suffixes)
|
35
|
-
|
35
|
+
result = Onebox::Helpers.normalize_url_for_output(value)
|
36
|
+
result unless Onebox::Helpers::blank?(result)
|
36
37
|
else
|
37
38
|
value
|
38
39
|
end
|
@@ -6,7 +6,7 @@ class Sanitize
|
|
6
6
|
HTTP_PROTOCOLS ||= ['http', 'https', :relative].freeze
|
7
7
|
|
8
8
|
ONEBOX ||= freeze_config merge(RELAXED,
|
9
|
-
elements: RELAXED[:elements] + %w[audio embed iframe source video svg path],
|
9
|
+
elements: RELAXED[:elements] + %w[audio details embed iframe source video svg path],
|
10
10
|
|
11
11
|
attributes: {
|
12
12
|
'a' => RELAXED[:attributes]['a'] + %w(target),
|
data/lib/onebox/version.rb
CHANGED
@@ -4,9 +4,22 @@
|
|
4
4
|
</div>
|
5
5
|
|
6
6
|
<div class="github-info-container">
|
7
|
-
|
8
|
-
<
|
9
|
-
|
7
|
+
{{^body}}
|
8
|
+
<h4>
|
9
|
+
<a href="{{html_url}}" target="_blank" rel="noopener">{{title}}</a>
|
10
|
+
</h4>
|
11
|
+
{{/body}}
|
12
|
+
|
13
|
+
{{#body}}
|
14
|
+
<details class="onebox-details">
|
15
|
+
<summary class="onebox-details-summary">
|
16
|
+
<h4>
|
17
|
+
<a href="{{html_url}}" target="_blank" rel="noopener">{{title}}</a>
|
18
|
+
</h4>
|
19
|
+
</summary>
|
20
|
+
<p class="onebox-details-body">{{body}}</p>
|
21
|
+
</details>
|
22
|
+
{{/body}}
|
10
23
|
|
11
24
|
<div class="branches">
|
12
25
|
<code>{{base.label}}</code> ← <code>{{head.label}}</code>
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
8
8
|
- Vyki Englert
|
9
9
|
- Robin Ward
|
10
|
-
autorequire:
|
10
|
+
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-
|
13
|
+
date: 2021-04-12 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|
@@ -405,7 +405,7 @@ homepage: https://github.com/discourse/onebox
|
|
405
405
|
licenses:
|
406
406
|
- MIT
|
407
407
|
metadata: {}
|
408
|
-
post_install_message:
|
408
|
+
post_install_message:
|
409
409
|
rdoc_options: []
|
410
410
|
require_paths:
|
411
411
|
- lib
|
@@ -421,7 +421,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
421
421
|
version: '0'
|
422
422
|
requirements: []
|
423
423
|
rubygems_version: 3.0.3
|
424
|
-
signing_key:
|
424
|
+
signing_key:
|
425
425
|
specification_version: 4
|
426
426
|
summary: A gem for generating embeddable HTML previews from URLs.
|
427
427
|
test_files: []
|