onebox 1.9.28.4 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -6
- data/lib/onebox/engine.rb +1 -2
- data/lib/onebox/engine/{whitelisted_generic_onebox.rb → allowlisted_generic_onebox.rb} +16 -12
- data/lib/onebox/engine/audio_onebox.rb +1 -1
- data/lib/onebox/engine/facebook_media_onebox.rb +1 -1
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/google_photos_onebox.rb +18 -4
- data/lib/onebox/engine/image_onebox.rb +1 -1
- data/lib/onebox/engine/reddit_media_onebox.rb +1 -1
- data/lib/onebox/engine/standard_embed.rb +1 -1
- data/lib/onebox/engine/video_onebox.rb +1 -1
- data/lib/onebox/engine/youtube_onebox.rb +1 -1
- data/lib/onebox/helpers.rb +9 -10
- data/lib/onebox/mixins/twitch_onebox.rb +3 -1
- data/lib/onebox/sanitize_config.rb +1 -1
- data/lib/onebox/version.rb +1 -1
- data/templates/{whitelistedgeneric.mustache → allowlistedgeneric.mustache} +0 -0
- metadata +4 -6
- data/lib/onebox/engine/wechat_mp_onebox.rb +0 -62
- data/templates/wechatmp.mustache +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 373f78c55bcd96d80865329cabb0b3f2e091cfbac87f71a9b913b0512649e4c5
|
4
|
+
data.tar.gz: 3c21d762657d8f96109ea5db3048daca0948544c0d25625f4242ff35680e094c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae768140c4c42b634a10e9c3f8ab716adbb2bbda5f74a4c1c6205abaee4cbff557d3e87ae25b2f81039caed5c330e9dccadbce87b6e8af3ba691a35114ecffa8
|
7
|
+
data.tar.gz: 3102dc52f8bd7a9246ae1854289ad1138283b2dbf92f8a7bc53c99c650176d7b9c38afd57549305baf34e622fcde335fd13bee075b4f803470ea50c3f784407d
|
data/README.md
CHANGED
@@ -46,6 +46,14 @@ preview = Onebox.preview(url)
|
|
46
46
|
"#{preview}" == preview.to_s #=> true
|
47
47
|
```
|
48
48
|
|
49
|
+
### Twitch Onebox
|
50
|
+
|
51
|
+
To be able to embed Twitch video and clips, pass `hostname` in the options to `Onebox.preview`
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
preview = Onebox.preview(url, hostname: 'www.example.com')
|
55
|
+
```
|
56
|
+
|
49
57
|
Ruby Support
|
50
58
|
------------
|
51
59
|
|
@@ -63,12 +71,11 @@ out the project. You can then try out URLs.
|
|
63
71
|
The server doesn't reload code changes automatically (PRs accepted!) so
|
64
72
|
make sure to hit CTRL-C and restart the server to try a code change out.
|
65
73
|
|
66
|
-
|
67
74
|
Adding Support for a new URL
|
68
75
|
----------------------------
|
69
76
|
|
70
77
|
1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
|
71
|
-
If it does, you can probably get away with just
|
78
|
+
If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
|
72
79
|
If the site does not support open standards, you can create a new engine.
|
73
80
|
|
74
81
|
2. Create new onebox engine
|
@@ -156,16 +163,23 @@ Adding Support for a new URL
|
|
156
163
|
require_relative "engine/name_onebox"
|
157
164
|
```
|
158
165
|
|
159
|
-
|
160
|
-
Whitelisted Generic Onebox caveats
|
166
|
+
Allowlisted Generic Onebox caveats
|
161
167
|
----------------------------------
|
162
168
|
|
163
|
-
The
|
169
|
+
The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
|
164
170
|
|
165
|
-
1. The domain must be
|
171
|
+
1. The domain must be allowlisted
|
166
172
|
2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
|
167
173
|
3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
|
168
174
|
|
175
|
+
Ignoring Canonical URLs
|
176
|
+
-----------------------
|
177
|
+
|
178
|
+
Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
|
179
|
+
|
180
|
+
```html
|
181
|
+
<meta property="og:ignore_canonical" content="true" />
|
182
|
+
```
|
169
183
|
|
170
184
|
Installing
|
171
185
|
----------
|
data/lib/onebox/engine.rb
CHANGED
@@ -141,7 +141,7 @@ require_relative "engine/wikimedia_onebox"
|
|
141
141
|
require_relative "engine/wikipedia_onebox"
|
142
142
|
require_relative "engine/youtube_onebox"
|
143
143
|
require_relative "engine/youku_onebox"
|
144
|
-
require_relative "engine/
|
144
|
+
require_relative "engine/allowlisted_generic_onebox"
|
145
145
|
require_relative "engine/pubmed_onebox"
|
146
146
|
require_relative "engine/soundcloud_onebox"
|
147
147
|
require_relative "engine/imgur_onebox"
|
@@ -168,7 +168,6 @@ require_relative "engine/twitch_clips_onebox"
|
|
168
168
|
require_relative "engine/twitch_stream_onebox"
|
169
169
|
require_relative "engine/twitch_video_onebox"
|
170
170
|
require_relative "engine/trello_onebox"
|
171
|
-
require_relative "engine/wechat_mp_onebox"
|
172
171
|
require_relative "engine/cloudapp_onebox"
|
173
172
|
require_relative "engine/wistia_onebox"
|
174
173
|
require_relative "engine/simplecast_onebox"
|
@@ -4,20 +4,24 @@ require 'htmlentities'
|
|
4
4
|
|
5
5
|
module Onebox
|
6
6
|
module Engine
|
7
|
-
class
|
7
|
+
class AllowlistedGenericOnebox
|
8
8
|
include Engine
|
9
9
|
include StandardEmbed
|
10
10
|
include LayoutSupport
|
11
11
|
|
12
|
-
def self.
|
13
|
-
|
12
|
+
def self.priority
|
13
|
+
200
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.
|
17
|
-
@
|
16
|
+
def self.allowed_domains=(list)
|
17
|
+
@allowed_domains = list
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.
|
20
|
+
def self.allowed_domains
|
21
|
+
@allowed_domains ||= default_allowed_domains.dup
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.default_allowed_domains
|
21
25
|
%w(
|
22
26
|
23hq.com
|
23
27
|
500px.com
|
@@ -176,13 +180,13 @@ module Onebox
|
|
176
180
|
!!(uri.path =~ /\d{4}\/\d{2}\//)
|
177
181
|
end
|
178
182
|
|
179
|
-
def self.
|
183
|
+
def self.allowed_twitter_labels
|
180
184
|
['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
|
181
185
|
end
|
182
186
|
|
183
187
|
def self.===(other)
|
184
188
|
other.kind_of?(URI) ?
|
185
|
-
host_matches(other,
|
189
|
+
host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
|
186
190
|
super
|
187
191
|
end
|
188
192
|
|
@@ -233,11 +237,11 @@ module Onebox
|
|
233
237
|
end
|
234
238
|
|
235
239
|
# Twitter labels
|
236
|
-
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!
|
240
|
+
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
|
237
241
|
d[:label_1] = Onebox::Helpers.truncate(d[:label1])
|
238
242
|
d[:data_1] = Onebox::Helpers.truncate(d[:data1])
|
239
243
|
end
|
240
|
-
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!
|
244
|
+
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
|
241
245
|
unless Onebox::Helpers.blank?(d[:label_1])
|
242
246
|
d[:label_2] = Onebox::Helpers.truncate(d[:label2])
|
243
247
|
d[:data_2] = Onebox::Helpers.truncate(d[:data2])
|
@@ -261,7 +265,7 @@ module Onebox
|
|
261
265
|
def rewrite_https(html)
|
262
266
|
return unless html
|
263
267
|
uri = URI(@url)
|
264
|
-
if
|
268
|
+
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
|
265
269
|
html = html.gsub("http://", "https://")
|
266
270
|
end
|
267
271
|
html
|
@@ -309,7 +313,7 @@ module Onebox
|
|
309
313
|
data[:height] &&
|
310
314
|
(
|
311
315
|
data[:html]["iframe"] ||
|
312
|
-
|
316
|
+
AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
|
313
317
|
)
|
314
318
|
end
|
315
319
|
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -20,11 +20,25 @@ module Onebox
|
|
20
20
|
private
|
21
21
|
|
22
22
|
def video_html(og)
|
23
|
+
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
|
24
|
+
|
23
25
|
<<-HTML
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
<aside class="onebox google-photos">
|
27
|
+
<header class="source">
|
28
|
+
<img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
|
29
|
+
<a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
|
30
|
+
</header>
|
31
|
+
<article class="onebox-body">
|
32
|
+
<h3><a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{og.title}</a></h3>
|
33
|
+
<div class="aspect-image-full-size">
|
34
|
+
<a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">
|
35
|
+
<img src="#{og.get_secure_image}" class="scale-image"/>
|
36
|
+
<span class="instagram-video-icon"></span>
|
37
|
+
</a>
|
38
|
+
</div>
|
39
|
+
</article>
|
40
|
+
</aside>
|
41
|
+
HTML
|
28
42
|
end
|
29
43
|
|
30
44
|
def album_html(og)
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -32,7 +32,7 @@ module Onebox
|
|
32
32
|
add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
|
33
33
|
|
34
34
|
def always_https?
|
35
|
-
|
35
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
36
36
|
end
|
37
37
|
|
38
38
|
def raw
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -45,7 +45,7 @@ module Onebox
|
|
45
45
|
HTML
|
46
46
|
else
|
47
47
|
# for channel pages
|
48
|
-
html = Onebox::Engine::
|
48
|
+
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
49
49
|
return if Onebox::Helpers.blank?(html)
|
50
50
|
html.gsub!(/['"]\/\//, "https://")
|
51
51
|
html
|
data/lib/onebox/helpers.rb
CHANGED
@@ -51,17 +51,14 @@ module Onebox
|
|
51
51
|
|
52
52
|
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
53
53
|
|
54
|
-
uri = URI(location)
|
55
|
-
uri = URI(
|
54
|
+
uri = Addressable::URI.parse(location)
|
55
|
+
uri = Addressable::URI.join(domain, uri) if !uri.host
|
56
56
|
|
57
57
|
result = StringIO.new
|
58
|
-
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.
|
58
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
|
59
59
|
http.open_timeout = Onebox.options.connect_timeout
|
60
60
|
http.read_timeout = Onebox.options.timeout
|
61
|
-
|
62
|
-
http.use_ssl = true
|
63
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
64
|
-
end
|
61
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE # Work around path building bugs
|
65
62
|
|
66
63
|
headers ||= {}
|
67
64
|
|
@@ -76,10 +73,12 @@ module Onebox
|
|
76
73
|
http.request(request) do |response|
|
77
74
|
|
78
75
|
if cookie = response.get_fields('set-cookie')
|
79
|
-
|
76
|
+
# HACK: If this breaks again in the future, use HTTP::CookieJar from gem 'http-cookie'
|
77
|
+
# See test: it "does not send cookies to the wrong domain"
|
78
|
+
redir_header = { 'Cookie' => cookie.join('; ') }
|
80
79
|
end
|
81
80
|
|
82
|
-
|
81
|
+
redir_header = nil unless redir_header.is_a? Hash
|
83
82
|
|
84
83
|
code = response.code.to_i
|
85
84
|
unless code === 200
|
@@ -88,7 +87,7 @@ module Onebox
|
|
88
87
|
response['location'],
|
89
88
|
limit - 1,
|
90
89
|
"#{uri.scheme}://#{uri.host}",
|
91
|
-
|
90
|
+
redir_header
|
92
91
|
)
|
93
92
|
end
|
94
93
|
|
@@ -24,7 +24,9 @@ module Onebox
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def to_html
|
27
|
-
|
27
|
+
<<~HTML
|
28
|
+
<iframe src="//#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
|
29
|
+
HTML
|
28
30
|
end
|
29
31
|
end
|
30
32
|
end
|
@@ -14,7 +14,7 @@ class Sanitize
|
|
14
14
|
'embed' => %w[height src type width],
|
15
15
|
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href],
|
16
16
|
'source' => %w[src type],
|
17
|
-
'video' => %w[controls height loop width autoplay muted poster],
|
17
|
+
'video' => %w[controls height loop width autoplay muted poster controlslist playsinline],
|
18
18
|
'path' => %w[d],
|
19
19
|
'svg' => ['aria-hidden', 'width', 'height', 'viewbox'],
|
20
20
|
'div' => [:data], # any data-* attributes,
|
data/lib/onebox/version.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-
|
13
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- Rakefile
|
301
301
|
- lib/onebox.rb
|
302
302
|
- lib/onebox/engine.rb
|
303
|
+
- lib/onebox/engine/allowlisted_generic_onebox.rb
|
303
304
|
- lib/onebox/engine/amazon_onebox.rb
|
304
305
|
- lib/onebox/engine/asciinema_onebox.rb
|
305
306
|
- lib/onebox/engine/audio_onebox.rb
|
@@ -353,8 +354,6 @@ files:
|
|
353
354
|
- lib/onebox/engine/typeform_onebox.rb
|
354
355
|
- lib/onebox/engine/video_onebox.rb
|
355
356
|
- lib/onebox/engine/vimeo_onebox.rb
|
356
|
-
- lib/onebox/engine/wechat_mp_onebox.rb
|
357
|
-
- lib/onebox/engine/whitelisted_generic_onebox.rb
|
358
357
|
- lib/onebox/engine/wikimedia_onebox.rb
|
359
358
|
- lib/onebox/engine/wikipedia_onebox.rb
|
360
359
|
- lib/onebox/engine/wistia_onebox.rb
|
@@ -380,6 +379,7 @@ files:
|
|
380
379
|
- lib/onebox/web_helpers.rb
|
381
380
|
- onebox.gemspec
|
382
381
|
- templates/_layout.mustache
|
382
|
+
- templates/allowlistedgeneric.mustache
|
383
383
|
- templates/amazon.mustache
|
384
384
|
- templates/githubblob.mustache
|
385
385
|
- templates/githubcommit.mustache
|
@@ -396,8 +396,6 @@ files:
|
|
396
396
|
- templates/pubmed.mustache
|
397
397
|
- templates/stackexchange.mustache
|
398
398
|
- templates/twitterstatus.mustache
|
399
|
-
- templates/wechatmp.mustache
|
400
|
-
- templates/whitelistedgeneric.mustache
|
401
399
|
- templates/wikimedia.mustache
|
402
400
|
- templates/wikipedia.mustache
|
403
401
|
- templates/xkcd.mustache
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Onebox
|
4
|
-
module Engine
|
5
|
-
class WechatMpOnebox
|
6
|
-
include Engine
|
7
|
-
include LayoutSupport
|
8
|
-
include HTML
|
9
|
-
|
10
|
-
always_https
|
11
|
-
matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
|
12
|
-
|
13
|
-
def tld
|
14
|
-
@tld || @@matcher.match(@url)["tld"]
|
15
|
-
end
|
16
|
-
|
17
|
-
def http_params
|
18
|
-
{
|
19
|
-
'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
|
20
|
-
'Accept-Encoding' => 'plain'
|
21
|
-
}
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def extract_script_value(var_name)
|
27
|
-
if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
|
28
|
-
e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
|
29
|
-
CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# TODO need to handle hotlink protection from wechat
|
34
|
-
def image
|
35
|
-
if banner_image = extract_script_value("msg_cdn_url")
|
36
|
-
return banner_image
|
37
|
-
end
|
38
|
-
|
39
|
-
if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
|
40
|
-
attributes = main_image.first.attributes
|
41
|
-
|
42
|
-
return attributes["data-src"].to_s if attributes["data-src"]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def data
|
47
|
-
title = CGI.unescapeHTML(raw.css("title").inner_text)
|
48
|
-
by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
|
49
|
-
|
50
|
-
result = {
|
51
|
-
link: extract_script_value("msg_link") || link,
|
52
|
-
title: title,
|
53
|
-
image: image,
|
54
|
-
description: extract_script_value("msg_desc"),
|
55
|
-
by_info: by_info
|
56
|
-
}
|
57
|
-
|
58
|
-
result
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
data/templates/wechatmp.mustache
DELETED