onebox 1.9.30 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -6
- data/lib/onebox/engine.rb +25 -2
- data/lib/onebox/engine/{whitelisted_generic_onebox.rb → allowlisted_generic_onebox.rb} +28 -19
- data/lib/onebox/engine/audio_onebox.rb +1 -1
- data/lib/onebox/engine/bandcamp_onebox.rb +1 -0
- data/lib/onebox/engine/facebook_media_onebox.rb +2 -1
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/google_calendar_onebox.rb +1 -0
- data/lib/onebox/engine/google_maps_onebox.rb +2 -0
- data/lib/onebox/engine/image_onebox.rb +1 -1
- data/lib/onebox/engine/kaltura_onebox.rb +1 -0
- data/lib/onebox/engine/reddit_media_onebox.rb +1 -1
- data/lib/onebox/engine/sketchfab_onebox.rb +1 -0
- data/lib/onebox/engine/slides_onebox.rb +2 -1
- data/lib/onebox/engine/soundcloud_onebox.rb +1 -0
- data/lib/onebox/engine/standard_embed.rb +2 -1
- data/lib/onebox/engine/steam_store_onebox.rb +1 -0
- data/lib/onebox/engine/trello_onebox.rb +1 -0
- data/lib/onebox/engine/twitch_clips_onebox.rb +2 -0
- data/lib/onebox/engine/typeform_onebox.rb +1 -0
- data/lib/onebox/engine/video_onebox.rb +1 -1
- data/lib/onebox/engine/vimeo_onebox.rb +1 -0
- data/lib/onebox/engine/wistia_onebox.rb +1 -0
- data/lib/onebox/engine/youku_onebox.rb +9 -1
- data/lib/onebox/engine/youtube_onebox.rb +7 -31
- data/lib/onebox/matcher.rb +8 -2
- data/lib/onebox/mixins/twitch_onebox.rb +2 -1
- data/lib/onebox/preview.rb +11 -4
- data/lib/onebox/sanitize_config.rb +18 -2
- data/lib/onebox/version.rb +1 -1
- data/templates/{whitelistedgeneric.mustache → allowlistedgeneric.mustache} +0 -0
- metadata +4 -6
- data/lib/onebox/engine/wechat_mp_onebox.rb +0 -62
- data/templates/wechatmp.mustache +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebbcb2268bcade7ace9670046cfbf13f58862561b59802ef0011bc8ae869a6a7
|
4
|
+
data.tar.gz: 51e35394339f1cdd6c6dc7be9971bbc7a0e8b8f9a31b483d877e96be86319e0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f540f3437675e0abb5ccb25013b9b516e6bb101a21a26e00a2ecf111940bc660ef8706122f805a279cb67e85823be82183d9f095bc86a7aa7f494c0ecd68645
|
7
|
+
data.tar.gz: 82295ede48990cf922e1e3e6cce645cd1caf4f9965d8e289d756d727f4b7886e116a5ffa1676f7df3873d524b28161c30c75478e2008887a80324e2ed8639fde
|
data/README.md
CHANGED
@@ -71,12 +71,11 @@ out the project. You can then try out URLs.
|
|
71
71
|
The server doesn't reload code changes automatically (PRs accepted!) so
|
72
72
|
make sure to hit CTRL-C and restart the server to try a code change out.
|
73
73
|
|
74
|
-
|
75
74
|
Adding Support for a new URL
|
76
75
|
----------------------------
|
77
76
|
|
78
77
|
1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
|
79
|
-
If it does, you can probably get away with just
|
78
|
+
If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
|
80
79
|
If the site does not support open standards, you can create a new engine.
|
81
80
|
|
82
81
|
2. Create new onebox engine
|
@@ -164,16 +163,23 @@ Adding Support for a new URL
|
|
164
163
|
require_relative "engine/name_onebox"
|
165
164
|
```
|
166
165
|
|
167
|
-
|
168
|
-
Whitelisted Generic Onebox caveats
|
166
|
+
Allowlisted Generic Onebox caveats
|
169
167
|
----------------------------------
|
170
168
|
|
171
|
-
The
|
169
|
+
The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
|
172
170
|
|
173
|
-
1. The domain must be
|
171
|
+
1. The domain must be allowlisted
|
174
172
|
2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
|
175
173
|
3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
|
176
174
|
|
175
|
+
Ignoring Canonical URLs
|
176
|
+
-----------------------
|
177
|
+
|
178
|
+
Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
|
179
|
+
|
180
|
+
```html
|
181
|
+
<meta property="og:ignore_canonical" content="true" />
|
182
|
+
```
|
177
183
|
|
178
184
|
Installing
|
179
185
|
----------
|
data/lib/onebox/engine.rb
CHANGED
@@ -12,6 +12,22 @@ module Onebox
|
|
12
12
|
end.map(&method(:const_get))
|
13
13
|
end
|
14
14
|
|
15
|
+
def self.all_iframe_origins
|
16
|
+
engines.flat_map { |e| e.iframe_origins }.uniq.compact
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.origins_to_regexes(origins)
|
20
|
+
return /.*/ if origins.include?("*")
|
21
|
+
origins.map do |origin|
|
22
|
+
escaped_origin = Regexp.escape(origin)
|
23
|
+
if origin.start_with?("*.", "https://*.", "http://*.")
|
24
|
+
escaped_origin = escaped_origin.sub("\\*", '\S*')
|
25
|
+
end
|
26
|
+
|
27
|
+
Regexp.new("\\A#{escaped_origin}", 'i')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
15
31
|
attr_reader :url, :uri
|
16
32
|
attr_reader :timeout
|
17
33
|
|
@@ -100,6 +116,14 @@ module Onebox
|
|
100
116
|
class_variable_set :@@matcher, r
|
101
117
|
end
|
102
118
|
|
119
|
+
def requires_iframe_origins(*origins)
|
120
|
+
class_variable_set :@@iframe_origins, origins
|
121
|
+
end
|
122
|
+
|
123
|
+
def iframe_origins
|
124
|
+
class_variable_defined?(:@@iframe_origins) ? class_variable_get(:@@iframe_origins) : []
|
125
|
+
end
|
126
|
+
|
103
127
|
# calculates a name for onebox using the class name of engine
|
104
128
|
def onebox_name
|
105
129
|
name.split("::").last.downcase.gsub(/onebox/, "")
|
@@ -141,7 +165,7 @@ require_relative "engine/wikimedia_onebox"
|
|
141
165
|
require_relative "engine/wikipedia_onebox"
|
142
166
|
require_relative "engine/youtube_onebox"
|
143
167
|
require_relative "engine/youku_onebox"
|
144
|
-
require_relative "engine/
|
168
|
+
require_relative "engine/allowlisted_generic_onebox"
|
145
169
|
require_relative "engine/pubmed_onebox"
|
146
170
|
require_relative "engine/soundcloud_onebox"
|
147
171
|
require_relative "engine/imgur_onebox"
|
@@ -168,7 +192,6 @@ require_relative "engine/twitch_clips_onebox"
|
|
168
192
|
require_relative "engine/twitch_stream_onebox"
|
169
193
|
require_relative "engine/twitch_video_onebox"
|
170
194
|
require_relative "engine/trello_onebox"
|
171
|
-
require_relative "engine/wechat_mp_onebox"
|
172
195
|
require_relative "engine/cloudapp_onebox"
|
173
196
|
require_relative "engine/wistia_onebox"
|
174
197
|
require_relative "engine/simplecast_onebox"
|
@@ -4,20 +4,24 @@ require 'htmlentities'
|
|
4
4
|
|
5
5
|
module Onebox
|
6
6
|
module Engine
|
7
|
-
class
|
7
|
+
class AllowlistedGenericOnebox
|
8
8
|
include Engine
|
9
9
|
include StandardEmbed
|
10
10
|
include LayoutSupport
|
11
11
|
|
12
|
-
def self.
|
13
|
-
|
12
|
+
def self.priority
|
13
|
+
200
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.
|
17
|
-
@
|
16
|
+
def self.allowed_domains=(list)
|
17
|
+
@allowed_domains = list
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.
|
20
|
+
def self.allowed_domains
|
21
|
+
@allowed_domains ||= default_allowed_domains.dup
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.default_allowed_domains
|
21
25
|
%w(
|
22
26
|
23hq.com
|
23
27
|
500px.com
|
@@ -176,13 +180,13 @@ module Onebox
|
|
176
180
|
!!(uri.path =~ /\d{4}\/\d{2}\//)
|
177
181
|
end
|
178
182
|
|
179
|
-
def self.
|
183
|
+
def self.allowed_twitter_labels
|
180
184
|
['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
|
181
185
|
end
|
182
186
|
|
183
187
|
def self.===(other)
|
184
188
|
other.kind_of?(URI) ?
|
185
|
-
host_matches(other,
|
189
|
+
host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
|
186
190
|
super
|
187
191
|
end
|
188
192
|
|
@@ -233,11 +237,11 @@ module Onebox
|
|
233
237
|
end
|
234
238
|
|
235
239
|
# Twitter labels
|
236
|
-
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!
|
240
|
+
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
|
237
241
|
d[:label_1] = Onebox::Helpers.truncate(d[:label1])
|
238
242
|
d[:data_1] = Onebox::Helpers.truncate(d[:data1])
|
239
243
|
end
|
240
|
-
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!
|
244
|
+
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
|
241
245
|
unless Onebox::Helpers.blank?(d[:label_1])
|
242
246
|
d[:label_2] = Onebox::Helpers.truncate(d[:label2])
|
243
247
|
d[:data_2] = Onebox::Helpers.truncate(d[:data2])
|
@@ -261,7 +265,7 @@ module Onebox
|
|
261
265
|
def rewrite_https(html)
|
262
266
|
return unless html
|
263
267
|
uri = URI(@url)
|
264
|
-
if
|
268
|
+
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
|
265
269
|
html = html.gsub("http://", "https://")
|
266
270
|
end
|
267
271
|
html
|
@@ -277,7 +281,9 @@ module Onebox
|
|
277
281
|
end
|
278
282
|
|
279
283
|
def is_card?
|
280
|
-
data[:card] == 'player' &&
|
284
|
+
data[:card] == 'player' &&
|
285
|
+
data[:player] =~ URI::regexp &&
|
286
|
+
options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
|
281
287
|
end
|
282
288
|
|
283
289
|
def is_article?
|
@@ -301,16 +307,19 @@ module Onebox
|
|
301
307
|
end
|
302
308
|
|
303
309
|
def is_video?
|
304
|
-
data[:type] =~ /^video[\/\.]/ &&
|
310
|
+
data[:type] =~ /^video[\/\.]/ &&
|
311
|
+
data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
|
312
|
+
!Onebox::Helpers.blank?(data[:video])
|
305
313
|
end
|
306
314
|
|
307
315
|
def is_embedded?
|
308
|
-
data[:html] &&
|
309
|
-
data[:
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
)
|
316
|
+
return false unless data[:html] && data[:height]
|
317
|
+
return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
|
318
|
+
return false unless data[:html]["iframe"]
|
319
|
+
|
320
|
+
fragment = Nokogiri::HTML::fragment(data[:html])
|
321
|
+
src = fragment.at_css('iframe')&.[]("src")
|
322
|
+
options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
|
314
323
|
end
|
315
324
|
|
316
325
|
def card_html
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -8,6 +8,7 @@ module Onebox
|
|
8
8
|
|
9
9
|
matches_regexp(/^https?:\/\/.*\.facebook\.com\/(\w+)\/(videos|\?).*/)
|
10
10
|
always_https
|
11
|
+
requires_iframe_origins "https://www.facebook.com"
|
11
12
|
|
12
13
|
def to_html
|
13
14
|
metadata = get_twitter
|
@@ -22,7 +23,7 @@ module Onebox
|
|
22
23
|
</iframe>
|
23
24
|
HTML
|
24
25
|
else
|
25
|
-
html = Onebox::Engine::
|
26
|
+
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
26
27
|
return if Onebox::Helpers.blank?(html)
|
27
28
|
html
|
28
29
|
end
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -7,10 +7,11 @@ module Onebox
|
|
7
7
|
include StandardEmbed
|
8
8
|
|
9
9
|
matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
|
10
|
+
requires_iframe_origins "https://slides.com"
|
10
11
|
|
11
12
|
def to_html
|
12
13
|
<<-HTML
|
13
|
-
<iframe src="
|
14
|
+
<iframe src="https://slides.com#{uri.path}/embed?style=light"
|
14
15
|
width="576"
|
15
16
|
height="420"
|
16
17
|
scrolling="no"
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "cgi"
|
4
4
|
require "onebox/open_graph"
|
5
|
+
require 'onebox/oembed'
|
5
6
|
|
6
7
|
module Onebox
|
7
8
|
module Engine
|
@@ -32,7 +33,7 @@ module Onebox
|
|
32
33
|
add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
|
33
34
|
|
34
35
|
def always_https?
|
35
|
-
|
36
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
36
37
|
end
|
37
38
|
|
38
39
|
def raw
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -7,6 +7,7 @@ module Onebox
|
|
7
7
|
include HTML
|
8
8
|
|
9
9
|
matches_regexp(/^(https?:\/\/)?([\da-z\.-]+)(youku.com\/)(.)+\/?$/)
|
10
|
+
requires_iframe_origins "https://player.youku.com"
|
10
11
|
|
11
12
|
# Try to get the video ID. Works for URLs of the form:
|
12
13
|
# * http://v.youku.com/v_show/id_XNjM3MzAxNzc2.html
|
@@ -19,7 +20,14 @@ module Onebox
|
|
19
20
|
end
|
20
21
|
|
21
22
|
def to_html
|
22
|
-
|
23
|
+
<<~HTML
|
24
|
+
<iframe src="https://player.youku.com/embed/#{video_id}"
|
25
|
+
width="640"
|
26
|
+
height="430"
|
27
|
+
frameborder='0'
|
28
|
+
allowfullscreen>
|
29
|
+
</iframe>
|
30
|
+
HTML
|
23
31
|
end
|
24
32
|
|
25
33
|
private
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'onebox/oembed'
|
4
|
-
|
5
3
|
module Onebox
|
6
4
|
module Engine
|
7
5
|
class YoutubeOnebox
|
@@ -9,16 +7,17 @@ module Onebox
|
|
9
7
|
include StandardEmbed
|
10
8
|
|
11
9
|
matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
|
10
|
+
requires_iframe_origins "https://www.youtube.com"
|
12
11
|
always_https
|
13
12
|
|
14
13
|
WIDTH ||= 480
|
15
14
|
HEIGHT ||= 360
|
16
15
|
|
17
16
|
def placeholder_html
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
"<img src='#{
|
17
|
+
og = get_opengraph.data
|
18
|
+
|
19
|
+
if video_id || list_id
|
20
|
+
"<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
|
22
21
|
else
|
23
22
|
to_html
|
24
23
|
end
|
@@ -45,7 +44,7 @@ module Onebox
|
|
45
44
|
HTML
|
46
45
|
else
|
47
46
|
# for channel pages
|
48
|
-
html = Onebox::Engine::
|
47
|
+
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
49
48
|
return if Onebox::Helpers.blank?(html)
|
50
49
|
html.gsub!(/['"]\/\//, "https://")
|
51
50
|
html
|
@@ -53,7 +52,7 @@ module Onebox
|
|
53
52
|
end
|
54
53
|
|
55
54
|
def video_title
|
56
|
-
@video_title ||=
|
55
|
+
@video_title ||= get_opengraph.data[:title]
|
57
56
|
end
|
58
57
|
|
59
58
|
private
|
@@ -81,29 +80,6 @@ module Onebox
|
|
81
80
|
@list_id ||= params['list']
|
82
81
|
end
|
83
82
|
|
84
|
-
def list_thumbnail_url
|
85
|
-
@list_thumbnail_url ||= begin
|
86
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
87
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
88
|
-
data = Onebox::Oembed.new(response)
|
89
|
-
data.thumbnail_url
|
90
|
-
rescue
|
91
|
-
nil
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def video_oembed_data
|
96
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
|
97
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
98
|
-
Onebox::Oembed.new(response)
|
99
|
-
end
|
100
|
-
|
101
|
-
def list_oembed_data
|
102
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
103
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
104
|
-
Onebox::Oembed.new(response)
|
105
|
-
end
|
106
|
-
|
107
83
|
def embed_params
|
108
84
|
p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
|
109
85
|
|
data/lib/onebox/matcher.rb
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
module Onebox
|
4
4
|
class Matcher
|
5
|
-
def initialize(link)
|
5
|
+
def initialize(link, options = {})
|
6
6
|
@url = link
|
7
|
+
@options = options
|
7
8
|
end
|
8
9
|
|
9
10
|
def ordered_engines
|
@@ -16,9 +17,14 @@ module Onebox
|
|
16
17
|
uri = URI(@url)
|
17
18
|
return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
|
18
19
|
return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
|
19
|
-
ordered_engines.find { |engine| engine === uri }
|
20
|
+
ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) }
|
20
21
|
rescue URI::InvalidURIError
|
21
22
|
nil
|
22
23
|
end
|
24
|
+
|
25
|
+
def has_allowed_iframe_origins?(engine)
|
26
|
+
allowed_regexes = @options[:allowed_iframe_regexes] || []
|
27
|
+
engine.iframe_origins.all? { |o| allowed_regexes.any? { |r| o =~ r } }
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
@@ -7,6 +7,7 @@ module Onebox
|
|
7
7
|
def self.included(klass)
|
8
8
|
klass.include(Onebox::Engine)
|
9
9
|
klass.matches_regexp(klass.twitch_regexp)
|
10
|
+
klass.requires_iframe_origins "https://player.twitch.tv"
|
10
11
|
klass.include(InstanceMethods)
|
11
12
|
end
|
12
13
|
|
@@ -25,7 +26,7 @@ module Onebox
|
|
25
26
|
|
26
27
|
def to_html
|
27
28
|
<<~HTML
|
28
|
-
<iframe src="
|
29
|
+
<iframe src="https://#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
|
29
30
|
HTML
|
30
31
|
end
|
31
32
|
end
|
data/lib/onebox/preview.rb
CHANGED
@@ -7,10 +7,14 @@ module Onebox
|
|
7
7
|
client_exception = defined?(Net::HTTPClientException) ? Net::HTTPClientException : Net::HTTPServerException
|
8
8
|
WEB_EXCEPTIONS ||= [client_exception, OpenURI::HTTPError, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED]
|
9
9
|
|
10
|
-
def initialize(link,
|
10
|
+
def initialize(link, options = Onebox.options)
|
11
11
|
@url = link
|
12
|
-
@options =
|
13
|
-
|
12
|
+
@options = options.dup
|
13
|
+
|
14
|
+
allowed_origins = @options[:allowed_iframe_origins] || Onebox::Engine.all_iframe_origins
|
15
|
+
@options[:allowed_iframe_regexes] = Engine.origins_to_regexes(allowed_origins)
|
16
|
+
|
17
|
+
@engine_class = Matcher.new(@url, @options).oneboxed
|
14
18
|
end
|
15
19
|
|
16
20
|
def to_s
|
@@ -63,7 +67,10 @@ module Onebox
|
|
63
67
|
end
|
64
68
|
|
65
69
|
def sanitize(html)
|
66
|
-
|
70
|
+
config = @options[:sanitize_config] || Sanitize::Config::ONEBOX
|
71
|
+
config = config.merge(allowed_iframe_regexes: @options[:allowed_iframe_regexes])
|
72
|
+
|
73
|
+
Sanitize.fragment(html, config)
|
67
74
|
end
|
68
75
|
|
69
76
|
def engine
|
@@ -12,9 +12,9 @@ class Sanitize
|
|
12
12
|
'a' => RELAXED[:attributes]['a'] + %w(target),
|
13
13
|
'audio' => %w[controls],
|
14
14
|
'embed' => %w[height src type width],
|
15
|
-
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href],
|
15
|
+
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
|
16
16
|
'source' => %w[src type],
|
17
|
-
'video' => %w[controls height loop width autoplay muted poster],
|
17
|
+
'video' => %w[controls height loop width autoplay muted poster controlslist playsinline],
|
18
18
|
'path' => %w[d],
|
19
19
|
'svg' => ['aria-hidden', 'width', 'height', 'viewbox'],
|
20
20
|
'div' => [:data], # any data-* attributes,
|
@@ -39,6 +39,22 @@ class Sanitize
|
|
39
39
|
else
|
40
40
|
a_tag.remove_attribute('target')
|
41
41
|
end
|
42
|
+
end,
|
43
|
+
|
44
|
+
lambda do |env|
|
45
|
+
next unless env[:node_name] == 'iframe'
|
46
|
+
|
47
|
+
iframe = env[:node]
|
48
|
+
allowed_regexes = env[:config][:allowed_iframe_regexes] || [/.*/]
|
49
|
+
|
50
|
+
allowed = allowed_regexes.any? { |r| iframe["src"] =~ r }
|
51
|
+
|
52
|
+
if !allowed
|
53
|
+
# add a data attribute with the blocked src. This is not required
|
54
|
+
# but makes it much easier to troubleshoot onebox issues
|
55
|
+
iframe["data-unsanitized-src"] = iframe["src"]
|
56
|
+
iframe.remove_attribute("src")
|
57
|
+
end
|
42
58
|
end
|
43
59
|
],
|
44
60
|
|
data/lib/onebox/version.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-
|
13
|
+
date: 2020-08-27 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- Rakefile
|
301
301
|
- lib/onebox.rb
|
302
302
|
- lib/onebox/engine.rb
|
303
|
+
- lib/onebox/engine/allowlisted_generic_onebox.rb
|
303
304
|
- lib/onebox/engine/amazon_onebox.rb
|
304
305
|
- lib/onebox/engine/asciinema_onebox.rb
|
305
306
|
- lib/onebox/engine/audio_onebox.rb
|
@@ -353,8 +354,6 @@ files:
|
|
353
354
|
- lib/onebox/engine/typeform_onebox.rb
|
354
355
|
- lib/onebox/engine/video_onebox.rb
|
355
356
|
- lib/onebox/engine/vimeo_onebox.rb
|
356
|
-
- lib/onebox/engine/wechat_mp_onebox.rb
|
357
|
-
- lib/onebox/engine/whitelisted_generic_onebox.rb
|
358
357
|
- lib/onebox/engine/wikimedia_onebox.rb
|
359
358
|
- lib/onebox/engine/wikipedia_onebox.rb
|
360
359
|
- lib/onebox/engine/wistia_onebox.rb
|
@@ -380,6 +379,7 @@ files:
|
|
380
379
|
- lib/onebox/web_helpers.rb
|
381
380
|
- onebox.gemspec
|
382
381
|
- templates/_layout.mustache
|
382
|
+
- templates/allowlistedgeneric.mustache
|
383
383
|
- templates/amazon.mustache
|
384
384
|
- templates/githubblob.mustache
|
385
385
|
- templates/githubcommit.mustache
|
@@ -396,8 +396,6 @@ files:
|
|
396
396
|
- templates/pubmed.mustache
|
397
397
|
- templates/stackexchange.mustache
|
398
398
|
- templates/twitterstatus.mustache
|
399
|
-
- templates/wechatmp.mustache
|
400
|
-
- templates/whitelistedgeneric.mustache
|
401
399
|
- templates/wikimedia.mustache
|
402
400
|
- templates/wikipedia.mustache
|
403
401
|
- templates/xkcd.mustache
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Onebox
|
4
|
-
module Engine
|
5
|
-
class WechatMpOnebox
|
6
|
-
include Engine
|
7
|
-
include LayoutSupport
|
8
|
-
include HTML
|
9
|
-
|
10
|
-
always_https
|
11
|
-
matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
|
12
|
-
|
13
|
-
def tld
|
14
|
-
@tld || @@matcher.match(@url)["tld"]
|
15
|
-
end
|
16
|
-
|
17
|
-
def http_params
|
18
|
-
{
|
19
|
-
'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
|
20
|
-
'Accept-Encoding' => 'plain'
|
21
|
-
}
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def extract_script_value(var_name)
|
27
|
-
if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
|
28
|
-
e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
|
29
|
-
CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# TODO need to handle hotlink protection from wechat
|
34
|
-
def image
|
35
|
-
if banner_image = extract_script_value("msg_cdn_url")
|
36
|
-
return banner_image
|
37
|
-
end
|
38
|
-
|
39
|
-
if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
|
40
|
-
attributes = main_image.first.attributes
|
41
|
-
|
42
|
-
return attributes["data-src"].to_s if attributes["data-src"]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def data
|
47
|
-
title = CGI.unescapeHTML(raw.css("title").inner_text)
|
48
|
-
by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
|
49
|
-
|
50
|
-
result = {
|
51
|
-
link: extract_script_value("msg_link") || link,
|
52
|
-
title: title,
|
53
|
-
image: image,
|
54
|
-
description: extract_script_value("msg_desc"),
|
55
|
-
by_info: by_info
|
56
|
-
}
|
57
|
-
|
58
|
-
result
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
data/templates/wechatmp.mustache
DELETED