onebox 2.2.14 → 2.2.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/onebox/engine.rb +6 -10
- data/lib/onebox/engine/allowlisted_generic_onebox.rb +0 -9
- data/lib/onebox/engine/amazon_onebox.rb +23 -16
- data/lib/onebox/engine/flickr_onebox.rb +2 -2
- data/lib/onebox/engine/gfycat_onebox.rb +26 -26
- data/lib/onebox/engine/github_commit_onebox.rb +1 -1
- data/lib/onebox/engine/github_folder_onebox.rb +1 -1
- data/lib/onebox/engine/google_docs_onebox.rb +22 -40
- data/lib/onebox/engine/google_maps_onebox.rb +10 -6
- data/lib/onebox/engine/google_photos_onebox.rb +6 -6
- data/lib/onebox/engine/imgur_onebox.rb +2 -2
- data/lib/onebox/engine/instagram_onebox.rb +2 -3
- data/lib/onebox/engine/pastebin_onebox.rb +11 -15
- data/lib/onebox/engine/pdf_onebox.rb +7 -15
- data/lib/onebox/engine/pubmed_onebox.rb +16 -12
- data/lib/onebox/engine/stack_exchange_onebox.rb +1 -1
- data/lib/onebox/engine/standard_embed.rb +0 -3
- data/lib/onebox/engine/trello_onebox.rb +3 -6
- data/lib/onebox/engine/youku_onebox.rb +0 -6
- data/lib/onebox/helpers.rb +2 -1
- data/lib/onebox/layout.rb +2 -14
- data/lib/onebox/matcher.rb +10 -8
- data/lib/onebox/mixins/git_blob_onebox.rb +3 -5
- data/lib/onebox/open_graph.rb +4 -4
- data/lib/onebox/preview.rb +2 -2
- data/lib/onebox/version.rb +1 -1
- data/templates/_layout.mustache +6 -2
- data/templates/allowlistedgeneric.mustache +8 -9
- data/templates/amazon.mustache +5 -2
- data/templates/githubblob.mustache +44 -34
- data/templates/githubcommit.mustache +1 -3
- data/templates/githubfolder.mustache +2 -2
- data/templates/githubgist.mustache +9 -6
- data/templates/githubissue.mustache +3 -3
- data/templates/githubpullrequest.mustache +1 -1
- data/templates/gitlabblob.mustache +11 -4
- data/templates/googledocs.mustache +2 -2
- data/templates/googledrive.mustache +2 -2
- data/templates/googleplayapp.mustache +2 -1
- data/templates/instagram.mustache +1 -1
- data/templates/pastebin.mustache +6 -2
- data/templates/pdf.mustache +6 -3
- data/templates/stackexchange.mustache +1 -0
- data/templates/twitterstatus.mustache +20 -5
- data/templates/wikimedia.mustache +2 -2
- data/templates/wikipedia.mustache +2 -2
- data/templates/xkcd.mustache +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e2169ef1cfb44f0208566f5ecff7bd7c5d2bbe2f12b58791f50aa24b776e56d
|
4
|
+
data.tar.gz: 9f9d38b578e46e6ce8c8c007577ff02522eab24f3dcc42f9eabd5b838a668e72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e293af57162b61ad0fa3472b7d9b35709f13504308e69eba45f40e9babff53e0258186add06ed4a89bc8305b8903dc49718c274659b5fca0712bc26d281ca7d4
|
7
|
+
data.tar.gz: 3bb70c552e010149bc32fd4c9dbe416cd6ae0098b4286e5894fbc12c07628b5079c0463ad78b54ab6dc773954e055c9195aea5ac180ab8bdf7dca3847db1bc96
|
data/lib/onebox/engine.rb
CHANGED
@@ -28,23 +28,19 @@ module Onebox
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
attr_reader :url, :uri
|
32
|
-
attr_reader :timeout
|
31
|
+
attr_reader :url, :uri, :options, :timeout
|
33
32
|
attr :errors
|
34
33
|
|
35
34
|
DEFAULT = {}
|
36
|
-
def options
|
37
|
-
@options
|
38
|
-
end
|
39
35
|
|
40
36
|
def options=(opt)
|
41
|
-
return @options if opt.nil? #make sure options provided
|
42
|
-
opt = opt.to_h
|
37
|
+
return @options if opt.nil? # make sure options provided
|
38
|
+
opt = opt.to_h if opt.instance_of?(OpenStruct)
|
43
39
|
@options.merge!(opt)
|
44
40
|
@options
|
45
41
|
end
|
46
42
|
|
47
|
-
def initialize(
|
43
|
+
def initialize(url, timeout = nil)
|
48
44
|
@errors = {}
|
49
45
|
@options = DEFAULT
|
50
46
|
class_name = self.class.name.split("::").last.to_s
|
@@ -52,8 +48,8 @@ module Onebox
|
|
52
48
|
# Set the engine options extracted from global options.
|
53
49
|
self.options = Onebox.options[class_name] || {}
|
54
50
|
|
55
|
-
@url =
|
56
|
-
@uri = URI(
|
51
|
+
@url = url
|
52
|
+
@uri = URI(url)
|
57
53
|
if always_https?
|
58
54
|
@uri.scheme = 'https'
|
59
55
|
@url = @uri.to_s
|
@@ -27,7 +27,6 @@ module Onebox
|
|
27
27
|
500px.com
|
28
28
|
8tracks.com
|
29
29
|
abc.net.au
|
30
|
-
about.com
|
31
30
|
answers.com
|
32
31
|
arstechnica.com
|
33
32
|
ask.com
|
@@ -36,11 +35,9 @@ module Onebox
|
|
36
35
|
bbs.boingboing.net
|
37
36
|
bestbuy.ca
|
38
37
|
bestbuy.com
|
39
|
-
blip.tv
|
40
38
|
bloomberg.com
|
41
39
|
businessinsider.com
|
42
40
|
change.org
|
43
|
-
clikthrough.com
|
44
41
|
cnet.com
|
45
42
|
cnn.com
|
46
43
|
codepen.io
|
@@ -90,7 +87,6 @@ module Onebox
|
|
90
87
|
meetup.com
|
91
88
|
mixcloud.com
|
92
89
|
mlb.com
|
93
|
-
myshopify.com
|
94
90
|
myspace.com
|
95
91
|
nba.com
|
96
92
|
npr.org
|
@@ -98,16 +94,13 @@ module Onebox
|
|
98
94
|
photobucket.com
|
99
95
|
pinterest.com
|
100
96
|
reference.com
|
101
|
-
revision3.com
|
102
97
|
rottentomatoes.com
|
103
98
|
samsung.com
|
104
|
-
screenr.com
|
105
99
|
scribd.com
|
106
100
|
slideshare.net
|
107
101
|
sourceforge.net
|
108
102
|
speakerdeck.com
|
109
103
|
spotify.com
|
110
|
-
squidoo.com
|
111
104
|
streamable.com
|
112
105
|
techcrunch.com
|
113
106
|
ted.com
|
@@ -124,7 +117,6 @@ module Onebox
|
|
124
117
|
twitpic.com
|
125
118
|
usatoday.com
|
126
119
|
viddler.com
|
127
|
-
videojug.com
|
128
120
|
vine.co
|
129
121
|
walmart.com
|
130
122
|
washingtonpost.com
|
@@ -275,7 +267,6 @@ module Onebox
|
|
275
267
|
|
276
268
|
def rewrite_https(html)
|
277
269
|
return unless html
|
278
|
-
uri = URI(@url)
|
279
270
|
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
|
280
271
|
html = html.gsub("http://", "https://")
|
281
272
|
end
|
@@ -19,10 +19,8 @@ module Onebox
|
|
19
19
|
# If possible, fetch the cached HTML body immediately so we can
|
20
20
|
# try to grab the canonical URL from that document,
|
21
21
|
# rather than guess at the best URL structure to use
|
22
|
-
if
|
23
|
-
|
24
|
-
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
25
|
-
end
|
22
|
+
if !@raw && has_cached_body
|
23
|
+
@raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
26
24
|
end
|
27
25
|
|
28
26
|
if @raw
|
@@ -31,7 +29,8 @@ module Onebox
|
|
31
29
|
end
|
32
30
|
|
33
31
|
if match && match[:id]
|
34
|
-
|
32
|
+
id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
|
33
|
+
return "https://www.amazon.#{tld}/dp/#{id}"
|
35
34
|
end
|
36
35
|
|
37
36
|
@url
|
@@ -49,6 +48,12 @@ module Onebox
|
|
49
48
|
|
50
49
|
private
|
51
50
|
|
51
|
+
def has_cached_body
|
52
|
+
body_cacher&.respond_to?('cache_response_body?') &&
|
53
|
+
body_cacher.cache_response_body?(uri.to_s) &&
|
54
|
+
body_cacher.cached_response_body_exists?(uri.to_s)
|
55
|
+
end
|
56
|
+
|
52
57
|
def match
|
53
58
|
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
|
54
59
|
end
|
@@ -57,9 +62,9 @@ module Onebox
|
|
57
62
|
if (main_image = raw.css("#main-image")) && main_image.any?
|
58
63
|
attributes = main_image.first.attributes
|
59
64
|
|
60
|
-
|
61
|
-
|
62
|
-
|
65
|
+
if attributes["data-a-hires"]
|
66
|
+
return attributes["data-a-hires"].to_s
|
67
|
+
elsif attributes["data-a-dynamic-image"]
|
63
68
|
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
|
64
69
|
end
|
65
70
|
end
|
@@ -67,9 +72,11 @@ module Onebox
|
|
67
72
|
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
68
73
|
attributes = landing_image.first.attributes
|
69
74
|
|
70
|
-
|
71
|
-
|
72
|
-
|
75
|
+
if attributes["data-old-hires"]
|
76
|
+
return attributes["data-old-hires"].to_s
|
77
|
+
else
|
78
|
+
return landing_image.first["src"].to_s
|
79
|
+
end
|
73
80
|
end
|
74
81
|
|
75
82
|
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
|
@@ -91,16 +98,16 @@ module Onebox
|
|
91
98
|
end
|
92
99
|
|
93
100
|
def multiple_authors(authors_xpath)
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
raw
|
102
|
+
.xpath(authors_xpath)
|
103
|
+
.map { |a| a.inner_text.strip }
|
104
|
+
.join(", ")
|
98
105
|
end
|
99
106
|
|
100
107
|
def data
|
101
108
|
og = ::Onebox::OpenGraph.new(raw)
|
102
109
|
|
103
|
-
if raw.at_css('#dp.book_mobile') #printed books
|
110
|
+
if raw.at_css('#dp.book_mobile') # printed books
|
104
111
|
title = raw.at("h1#title")&.inner_text
|
105
112
|
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
|
106
113
|
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
|
@@ -32,7 +32,7 @@ module Onebox
|
|
32
32
|
<span class='album-title'>#{album_title}</span>
|
33
33
|
</span>
|
34
34
|
</span>
|
35
|
-
<img src='#{og.
|
35
|
+
<img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
|
36
36
|
</a>
|
37
37
|
</div>
|
38
38
|
HTML
|
@@ -43,7 +43,7 @@ module Onebox
|
|
43
43
|
|
44
44
|
<<-HTML
|
45
45
|
<a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
|
46
|
-
<img src='#{og.
|
46
|
+
<img src='#{og.secure_image_url}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
|
47
47
|
</a>
|
48
48
|
HTML
|
49
49
|
end
|
@@ -9,8 +9,8 @@ module Onebox
|
|
9
9
|
matches_regexp(/^https?:\/\/gfycat\.com\//)
|
10
10
|
always_https
|
11
11
|
|
12
|
+
# This engine should have priority over AllowlistedGenericOnebox.
|
12
13
|
def self.priority
|
13
|
-
# This engine should have priority over AllowlistedGenericOnebox.
|
14
14
|
1
|
15
15
|
end
|
16
16
|
|
@@ -21,6 +21,7 @@ module Onebox
|
|
21
21
|
<img src="https://gfycat.com/static/favicons/favicon-96x96.png" class="site-icon" width="64" height="64">
|
22
22
|
<a href="#{data[:url]}" target="_blank" rel="nofollow ugc noopener">Gfycat.com</a>
|
23
23
|
</header>
|
24
|
+
|
24
25
|
<article class="onebox-body">
|
25
26
|
<h4>
|
26
27
|
#{data[:title]} by
|
@@ -36,11 +37,12 @@ module Onebox
|
|
36
37
|
<img title="Sorry, your browser doesn't support HTML5 video." src="#{data[:posterUrl]}">
|
37
38
|
</video>
|
38
39
|
</div>
|
40
|
+
|
39
41
|
<p>
|
40
42
|
<span class="label1">#{data[:keywords]}</span>
|
41
43
|
</p>
|
42
|
-
|
43
44
|
</article>
|
45
|
+
|
44
46
|
<div style="clear: both"></div>
|
45
47
|
</aside>
|
46
48
|
HTML
|
@@ -61,52 +63,50 @@ module Onebox
|
|
61
63
|
@match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?<name>.+)/)
|
62
64
|
end
|
63
65
|
|
64
|
-
def
|
65
|
-
@
|
66
|
-
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
67
|
-
Nokogiri::HTML(response)
|
68
|
-
end
|
69
|
-
end
|
66
|
+
def og_data
|
67
|
+
return @og_data if defined?(@og_data)
|
70
68
|
|
71
|
-
|
72
|
-
|
69
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
70
|
+
page = Nokogiri::HTML(response)
|
71
|
+
script = page.at_css('script[type="application/ld+json"]')
|
73
72
|
|
74
|
-
if json_string =
|
75
|
-
og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
|
73
|
+
if json_string = script&.text
|
74
|
+
@og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
|
75
|
+
else
|
76
|
+
@og_data = {}
|
76
77
|
end
|
77
|
-
|
78
|
-
og_data
|
79
78
|
end
|
80
79
|
|
81
80
|
def data
|
82
|
-
|
81
|
+
return @data if defined?(@data)
|
83
82
|
|
84
|
-
|
83
|
+
@data = {
|
85
84
|
name: match[:name],
|
86
85
|
title: og_data[:headline] || 'No Title',
|
87
86
|
author: og_data[:author],
|
88
|
-
url: @url
|
87
|
+
url: @url,
|
89
88
|
}
|
90
89
|
|
91
|
-
keywords = og_data[:keywords]&.split(',')
|
92
|
-
|
93
|
-
|
90
|
+
if keywords = og_data[:keywords]&.split(',')
|
91
|
+
@data[:keywords] = keywords
|
92
|
+
.map { |keyword| "<a href='https://gfycat.com/gifs/search/#{keyword}'>##{keyword}</a>" }
|
93
|
+
.join(' ')
|
94
94
|
end
|
95
95
|
|
96
96
|
if og_data[:video]
|
97
97
|
content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
|
98
98
|
video_url = Pathname.new(content_url)
|
99
|
-
|
100
|
-
|
99
|
+
@data[:webmUrl] = video_url.sub_ext(".webm").to_s
|
100
|
+
@data[:mp4Url] = video_url.sub_ext(".mp4").to_s
|
101
101
|
|
102
102
|
thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
|
103
|
-
|
103
|
+
@data[:posterUrl] = thumbnail_url
|
104
104
|
|
105
|
-
|
106
|
-
|
105
|
+
@data[:width] = og_data[:video][:width]
|
106
|
+
@data[:height] = og_data[:video][:height]
|
107
107
|
end
|
108
108
|
|
109
|
-
|
109
|
+
@data
|
110
110
|
end
|
111
111
|
end
|
112
112
|
end
|
@@ -10,7 +10,7 @@ module Onebox
|
|
10
10
|
include JSON
|
11
11
|
include Onebox::Mixins::GithubBody
|
12
12
|
|
13
|
-
matches_regexp
|
13
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
|
14
14
|
always_https
|
15
15
|
|
16
16
|
def url
|
@@ -7,7 +7,7 @@ module Onebox
|
|
7
7
|
include StandardEmbed
|
8
8
|
include LayoutSupport
|
9
9
|
|
10
|
-
matches_regexp
|
10
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
|
11
11
|
always_https
|
12
12
|
|
13
13
|
def self.priority
|
@@ -4,61 +4,43 @@ module Onebox
|
|
4
4
|
module Engine
|
5
5
|
class GoogleDocsOnebox
|
6
6
|
include Engine
|
7
|
+
include StandardEmbed
|
7
8
|
include LayoutSupport
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
document: :docs,
|
17
|
-
presentation: :slides,
|
18
|
-
forms: :forms,
|
19
|
-
}
|
20
|
-
end
|
10
|
+
SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
|
11
|
+
SHORT_TYPES = {
|
12
|
+
spreadsheets: :sheets,
|
13
|
+
document: :docs,
|
14
|
+
presentation: :slides,
|
15
|
+
forms: :forms,
|
16
|
+
}
|
21
17
|
|
22
|
-
matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{
|
18
|
+
matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
|
23
19
|
always_https
|
24
20
|
|
25
|
-
|
21
|
+
private
|
26
22
|
|
27
23
|
def data
|
28
|
-
og_data =
|
24
|
+
og_data = get_opengraph
|
25
|
+
short_type = SHORT_TYPES[match[:endpoint].to_sym]
|
26
|
+
|
27
|
+
description = if Onebox::Helpers.blank?(og_data.description)
|
28
|
+
"This #{short_type.to_s.chop.capitalize} is private"
|
29
|
+
else
|
30
|
+
Onebox::Helpers.truncate(og_data.description, 250)
|
31
|
+
end
|
32
|
+
|
29
33
|
{
|
30
34
|
link: link,
|
31
|
-
title: og_data
|
32
|
-
description:
|
33
|
-
type:
|
35
|
+
title: og_data.title || "Google #{short_type.to_s.capitalize}",
|
36
|
+
description: description,
|
37
|
+
type: short_type
|
34
38
|
}
|
35
39
|
end
|
36
40
|
|
37
|
-
def doc_type
|
38
|
-
@doc_type ||= match[:endpoint].to_sym
|
39
|
-
end
|
40
|
-
|
41
|
-
def shorttype
|
42
|
-
GoogleDocsOnebox.short_types[doc_type]
|
43
|
-
end
|
44
|
-
|
45
41
|
def match
|
46
42
|
@match ||= @url.match(@@matcher)
|
47
43
|
end
|
48
|
-
|
49
|
-
def get_og_data
|
50
|
-
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
51
|
-
html = Nokogiri::HTML(response)
|
52
|
-
og_data = {}
|
53
|
-
html.css('meta').each do |m|
|
54
|
-
if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
|
55
|
-
m_content = m.attribute('content').to_s.strip
|
56
|
-
m_property = m.attribute('property').to_s.gsub('og:', '')
|
57
|
-
og_data[m_property.to_sym] = m_content
|
58
|
-
end
|
59
|
-
end
|
60
|
-
og_data
|
61
|
-
end
|
62
44
|
end
|
63
45
|
end
|
64
46
|
end
|
@@ -119,8 +119,6 @@ module Onebox
|
|
119
119
|
@placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
|
120
120
|
|
121
121
|
when :canonical
|
122
|
-
uri = URI(@url)
|
123
|
-
|
124
122
|
query = URI::decode_www_form(uri.query).to_h
|
125
123
|
if !query.has_key?("ll")
|
126
124
|
raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
|
@@ -163,14 +161,20 @@ module Onebox
|
|
163
161
|
end
|
164
162
|
|
165
163
|
def follow_redirect!
|
166
|
-
uri = URI(@url)
|
167
164
|
begin
|
168
|
-
http = Net::HTTP.start(
|
169
|
-
|
170
|
-
|
165
|
+
http = Net::HTTP.start(
|
166
|
+
uri.host,
|
167
|
+
uri.port,
|
168
|
+
use_ssl: uri.scheme == 'https',
|
169
|
+
open_timeout: timeout,
|
170
|
+
read_timeout: timeout
|
171
|
+
)
|
171
172
|
|
173
|
+
response = http.head(uri.path)
|
172
174
|
raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
|
175
|
+
|
173
176
|
@url = response.code == "200" ? uri.to_s : response["Location"]
|
177
|
+
@uri = URI(@url)
|
174
178
|
ensure
|
175
179
|
http.finish rescue nil
|
176
180
|
end
|