onebox 1.9.30 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -6
- data/lib/onebox/engine.rb +25 -2
- data/lib/onebox/engine/{whitelisted_generic_onebox.rb → allowlisted_generic_onebox.rb} +28 -19
- data/lib/onebox/engine/audio_onebox.rb +1 -1
- data/lib/onebox/engine/bandcamp_onebox.rb +1 -0
- data/lib/onebox/engine/facebook_media_onebox.rb +2 -1
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/google_calendar_onebox.rb +1 -0
- data/lib/onebox/engine/google_maps_onebox.rb +2 -0
- data/lib/onebox/engine/image_onebox.rb +1 -1
- data/lib/onebox/engine/kaltura_onebox.rb +1 -0
- data/lib/onebox/engine/reddit_media_onebox.rb +1 -1
- data/lib/onebox/engine/sketchfab_onebox.rb +1 -0
- data/lib/onebox/engine/slides_onebox.rb +2 -1
- data/lib/onebox/engine/soundcloud_onebox.rb +1 -0
- data/lib/onebox/engine/standard_embed.rb +2 -1
- data/lib/onebox/engine/steam_store_onebox.rb +1 -0
- data/lib/onebox/engine/trello_onebox.rb +1 -0
- data/lib/onebox/engine/twitch_clips_onebox.rb +2 -0
- data/lib/onebox/engine/typeform_onebox.rb +1 -0
- data/lib/onebox/engine/video_onebox.rb +1 -1
- data/lib/onebox/engine/vimeo_onebox.rb +1 -0
- data/lib/onebox/engine/wistia_onebox.rb +1 -0
- data/lib/onebox/engine/youku_onebox.rb +9 -1
- data/lib/onebox/engine/youtube_onebox.rb +7 -31
- data/lib/onebox/matcher.rb +8 -2
- data/lib/onebox/mixins/twitch_onebox.rb +2 -1
- data/lib/onebox/preview.rb +11 -4
- data/lib/onebox/sanitize_config.rb +18 -2
- data/lib/onebox/version.rb +1 -1
- data/templates/{whitelistedgeneric.mustache → allowlistedgeneric.mustache} +0 -0
- metadata +4 -6
- data/lib/onebox/engine/wechat_mp_onebox.rb +0 -62
- data/templates/wechatmp.mustache +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebbcb2268bcade7ace9670046cfbf13f58862561b59802ef0011bc8ae869a6a7
|
4
|
+
data.tar.gz: 51e35394339f1cdd6c6dc7be9971bbc7a0e8b8f9a31b483d877e96be86319e0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f540f3437675e0abb5ccb25013b9b516e6bb101a21a26e00a2ecf111940bc660ef8706122f805a279cb67e85823be82183d9f095bc86a7aa7f494c0ecd68645
|
7
|
+
data.tar.gz: 82295ede48990cf922e1e3e6cce645cd1caf4f9965d8e289d756d727f4b7886e116a5ffa1676f7df3873d524b28161c30c75478e2008887a80324e2ed8639fde
|
data/README.md
CHANGED
@@ -71,12 +71,11 @@ out the project. You can then try out URLs.
|
|
71
71
|
The server doesn't reload code changes automatically (PRs accepted!) so
|
72
72
|
make sure to hit CTRL-C and restart the server to try a code change out.
|
73
73
|
|
74
|
-
|
75
74
|
Adding Support for a new URL
|
76
75
|
----------------------------
|
77
76
|
|
78
77
|
1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
|
79
|
-
If it does, you can probably get away with just
|
78
|
+
If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
|
80
79
|
If the site does not support open standards, you can create a new engine.
|
81
80
|
|
82
81
|
2. Create new onebox engine
|
@@ -164,16 +163,23 @@ Adding Support for a new URL
|
|
164
163
|
require_relative "engine/name_onebox"
|
165
164
|
```
|
166
165
|
|
167
|
-
|
168
|
-
Whitelisted Generic Onebox caveats
|
166
|
+
Allowlisted Generic Onebox caveats
|
169
167
|
----------------------------------
|
170
168
|
|
171
|
-
The
|
169
|
+
The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
|
172
170
|
|
173
|
-
1. The domain must be
|
171
|
+
1. The domain must be allowlisted
|
174
172
|
2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
|
175
173
|
3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
|
176
174
|
|
175
|
+
Ignoring Canonical URLs
|
176
|
+
-----------------------
|
177
|
+
|
178
|
+
Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
|
179
|
+
|
180
|
+
```html
|
181
|
+
<meta property="og:ignore_canonical" content="true" />
|
182
|
+
```
|
177
183
|
|
178
184
|
Installing
|
179
185
|
----------
|
data/lib/onebox/engine.rb
CHANGED
@@ -12,6 +12,22 @@ module Onebox
|
|
12
12
|
end.map(&method(:const_get))
|
13
13
|
end
|
14
14
|
|
15
|
+
def self.all_iframe_origins
|
16
|
+
engines.flat_map { |e| e.iframe_origins }.uniq.compact
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.origins_to_regexes(origins)
|
20
|
+
return /.*/ if origins.include?("*")
|
21
|
+
origins.map do |origin|
|
22
|
+
escaped_origin = Regexp.escape(origin)
|
23
|
+
if origin.start_with?("*.", "https://*.", "http://*.")
|
24
|
+
escaped_origin = escaped_origin.sub("\\*", '\S*')
|
25
|
+
end
|
26
|
+
|
27
|
+
Regexp.new("\\A#{escaped_origin}", 'i')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
15
31
|
attr_reader :url, :uri
|
16
32
|
attr_reader :timeout
|
17
33
|
|
@@ -100,6 +116,14 @@ module Onebox
|
|
100
116
|
class_variable_set :@@matcher, r
|
101
117
|
end
|
102
118
|
|
119
|
+
def requires_iframe_origins(*origins)
|
120
|
+
class_variable_set :@@iframe_origins, origins
|
121
|
+
end
|
122
|
+
|
123
|
+
def iframe_origins
|
124
|
+
class_variable_defined?(:@@iframe_origins) ? class_variable_get(:@@iframe_origins) : []
|
125
|
+
end
|
126
|
+
|
103
127
|
# calculates a name for onebox using the class name of engine
|
104
128
|
def onebox_name
|
105
129
|
name.split("::").last.downcase.gsub(/onebox/, "")
|
@@ -141,7 +165,7 @@ require_relative "engine/wikimedia_onebox"
|
|
141
165
|
require_relative "engine/wikipedia_onebox"
|
142
166
|
require_relative "engine/youtube_onebox"
|
143
167
|
require_relative "engine/youku_onebox"
|
144
|
-
require_relative "engine/
|
168
|
+
require_relative "engine/allowlisted_generic_onebox"
|
145
169
|
require_relative "engine/pubmed_onebox"
|
146
170
|
require_relative "engine/soundcloud_onebox"
|
147
171
|
require_relative "engine/imgur_onebox"
|
@@ -168,7 +192,6 @@ require_relative "engine/twitch_clips_onebox"
|
|
168
192
|
require_relative "engine/twitch_stream_onebox"
|
169
193
|
require_relative "engine/twitch_video_onebox"
|
170
194
|
require_relative "engine/trello_onebox"
|
171
|
-
require_relative "engine/wechat_mp_onebox"
|
172
195
|
require_relative "engine/cloudapp_onebox"
|
173
196
|
require_relative "engine/wistia_onebox"
|
174
197
|
require_relative "engine/simplecast_onebox"
|
@@ -4,20 +4,24 @@ require 'htmlentities'
|
|
4
4
|
|
5
5
|
module Onebox
|
6
6
|
module Engine
|
7
|
-
class
|
7
|
+
class AllowlistedGenericOnebox
|
8
8
|
include Engine
|
9
9
|
include StandardEmbed
|
10
10
|
include LayoutSupport
|
11
11
|
|
12
|
-
def self.
|
13
|
-
|
12
|
+
def self.priority
|
13
|
+
200
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.
|
17
|
-
@
|
16
|
+
def self.allowed_domains=(list)
|
17
|
+
@allowed_domains = list
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.
|
20
|
+
def self.allowed_domains
|
21
|
+
@allowed_domains ||= default_allowed_domains.dup
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.default_allowed_domains
|
21
25
|
%w(
|
22
26
|
23hq.com
|
23
27
|
500px.com
|
@@ -176,13 +180,13 @@ module Onebox
|
|
176
180
|
!!(uri.path =~ /\d{4}\/\d{2}\//)
|
177
181
|
end
|
178
182
|
|
179
|
-
def self.
|
183
|
+
def self.allowed_twitter_labels
|
180
184
|
['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
|
181
185
|
end
|
182
186
|
|
183
187
|
def self.===(other)
|
184
188
|
other.kind_of?(URI) ?
|
185
|
-
host_matches(other,
|
189
|
+
host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
|
186
190
|
super
|
187
191
|
end
|
188
192
|
|
@@ -233,11 +237,11 @@ module Onebox
|
|
233
237
|
end
|
234
238
|
|
235
239
|
# Twitter labels
|
236
|
-
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!
|
240
|
+
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
|
237
241
|
d[:label_1] = Onebox::Helpers.truncate(d[:label1])
|
238
242
|
d[:data_1] = Onebox::Helpers.truncate(d[:data1])
|
239
243
|
end
|
240
|
-
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!
|
244
|
+
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
|
241
245
|
unless Onebox::Helpers.blank?(d[:label_1])
|
242
246
|
d[:label_2] = Onebox::Helpers.truncate(d[:label2])
|
243
247
|
d[:data_2] = Onebox::Helpers.truncate(d[:data2])
|
@@ -261,7 +265,7 @@ module Onebox
|
|
261
265
|
def rewrite_https(html)
|
262
266
|
return unless html
|
263
267
|
uri = URI(@url)
|
264
|
-
if
|
268
|
+
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
|
265
269
|
html = html.gsub("http://", "https://")
|
266
270
|
end
|
267
271
|
html
|
@@ -277,7 +281,9 @@ module Onebox
|
|
277
281
|
end
|
278
282
|
|
279
283
|
def is_card?
|
280
|
-
data[:card] == 'player' &&
|
284
|
+
data[:card] == 'player' &&
|
285
|
+
data[:player] =~ URI::regexp &&
|
286
|
+
options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
|
281
287
|
end
|
282
288
|
|
283
289
|
def is_article?
|
@@ -301,16 +307,19 @@ module Onebox
|
|
301
307
|
end
|
302
308
|
|
303
309
|
def is_video?
|
304
|
-
data[:type] =~ /^video[\/\.]/ &&
|
310
|
+
data[:type] =~ /^video[\/\.]/ &&
|
311
|
+
data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
|
312
|
+
!Onebox::Helpers.blank?(data[:video])
|
305
313
|
end
|
306
314
|
|
307
315
|
def is_embedded?
|
308
|
-
data[:html] &&
|
309
|
-
data[:
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
)
|
316
|
+
return false unless data[:html] && data[:height]
|
317
|
+
return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
|
318
|
+
return false unless data[:html]["iframe"]
|
319
|
+
|
320
|
+
fragment = Nokogiri::HTML::fragment(data[:html])
|
321
|
+
src = fragment.at_css('iframe')&.[]("src")
|
322
|
+
options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
|
314
323
|
end
|
315
324
|
|
316
325
|
def card_html
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -8,6 +8,7 @@ module Onebox
|
|
8
8
|
|
9
9
|
matches_regexp(/^https?:\/\/.*\.facebook\.com\/(\w+)\/(videos|\?).*/)
|
10
10
|
always_https
|
11
|
+
requires_iframe_origins "https://www.facebook.com"
|
11
12
|
|
12
13
|
def to_html
|
13
14
|
metadata = get_twitter
|
@@ -22,7 +23,7 @@ module Onebox
|
|
22
23
|
</iframe>
|
23
24
|
HTML
|
24
25
|
else
|
25
|
-
html = Onebox::Engine::
|
26
|
+
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
26
27
|
return if Onebox::Helpers.blank?(html)
|
27
28
|
html
|
28
29
|
end
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -7,10 +7,11 @@ module Onebox
|
|
7
7
|
include StandardEmbed
|
8
8
|
|
9
9
|
matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
|
10
|
+
requires_iframe_origins "https://slides.com"
|
10
11
|
|
11
12
|
def to_html
|
12
13
|
<<-HTML
|
13
|
-
<iframe src="
|
14
|
+
<iframe src="https://slides.com#{uri.path}/embed?style=light"
|
14
15
|
width="576"
|
15
16
|
height="420"
|
16
17
|
scrolling="no"
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "cgi"
|
4
4
|
require "onebox/open_graph"
|
5
|
+
require 'onebox/oembed'
|
5
6
|
|
6
7
|
module Onebox
|
7
8
|
module Engine
|
@@ -32,7 +33,7 @@ module Onebox
|
|
32
33
|
add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
|
33
34
|
|
34
35
|
def always_https?
|
35
|
-
|
36
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
36
37
|
end
|
37
38
|
|
38
39
|
def raw
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -7,6 +7,7 @@ module Onebox
|
|
7
7
|
include HTML
|
8
8
|
|
9
9
|
matches_regexp(/^(https?:\/\/)?([\da-z\.-]+)(youku.com\/)(.)+\/?$/)
|
10
|
+
requires_iframe_origins "https://player.youku.com"
|
10
11
|
|
11
12
|
# Try to get the video ID. Works for URLs of the form:
|
12
13
|
# * http://v.youku.com/v_show/id_XNjM3MzAxNzc2.html
|
@@ -19,7 +20,14 @@ module Onebox
|
|
19
20
|
end
|
20
21
|
|
21
22
|
def to_html
|
22
|
-
|
23
|
+
<<~HTML
|
24
|
+
<iframe src="https://player.youku.com/embed/#{video_id}"
|
25
|
+
width="640"
|
26
|
+
height="430"
|
27
|
+
frameborder='0'
|
28
|
+
allowfullscreen>
|
29
|
+
</iframe>
|
30
|
+
HTML
|
23
31
|
end
|
24
32
|
|
25
33
|
private
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'onebox/oembed'
|
4
|
-
|
5
3
|
module Onebox
|
6
4
|
module Engine
|
7
5
|
class YoutubeOnebox
|
@@ -9,16 +7,17 @@ module Onebox
|
|
9
7
|
include StandardEmbed
|
10
8
|
|
11
9
|
matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
|
10
|
+
requires_iframe_origins "https://www.youtube.com"
|
12
11
|
always_https
|
13
12
|
|
14
13
|
WIDTH ||= 480
|
15
14
|
HEIGHT ||= 360
|
16
15
|
|
17
16
|
def placeholder_html
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
"<img src='#{
|
17
|
+
og = get_opengraph.data
|
18
|
+
|
19
|
+
if video_id || list_id
|
20
|
+
"<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
|
22
21
|
else
|
23
22
|
to_html
|
24
23
|
end
|
@@ -45,7 +44,7 @@ module Onebox
|
|
45
44
|
HTML
|
46
45
|
else
|
47
46
|
# for channel pages
|
48
|
-
html = Onebox::Engine::
|
47
|
+
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
49
48
|
return if Onebox::Helpers.blank?(html)
|
50
49
|
html.gsub!(/['"]\/\//, "https://")
|
51
50
|
html
|
@@ -53,7 +52,7 @@ module Onebox
|
|
53
52
|
end
|
54
53
|
|
55
54
|
def video_title
|
56
|
-
@video_title ||=
|
55
|
+
@video_title ||= get_opengraph.data[:title]
|
57
56
|
end
|
58
57
|
|
59
58
|
private
|
@@ -81,29 +80,6 @@ module Onebox
|
|
81
80
|
@list_id ||= params['list']
|
82
81
|
end
|
83
82
|
|
84
|
-
def list_thumbnail_url
|
85
|
-
@list_thumbnail_url ||= begin
|
86
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
87
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
88
|
-
data = Onebox::Oembed.new(response)
|
89
|
-
data.thumbnail_url
|
90
|
-
rescue
|
91
|
-
nil
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def video_oembed_data
|
96
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
|
97
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
98
|
-
Onebox::Oembed.new(response)
|
99
|
-
end
|
100
|
-
|
101
|
-
def list_oembed_data
|
102
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
103
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
104
|
-
Onebox::Oembed.new(response)
|
105
|
-
end
|
106
|
-
|
107
83
|
def embed_params
|
108
84
|
p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
|
109
85
|
|
data/lib/onebox/matcher.rb
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
module Onebox
|
4
4
|
class Matcher
|
5
|
-
def initialize(link)
|
5
|
+
def initialize(link, options = {})
|
6
6
|
@url = link
|
7
|
+
@options = options
|
7
8
|
end
|
8
9
|
|
9
10
|
def ordered_engines
|
@@ -16,9 +17,14 @@ module Onebox
|
|
16
17
|
uri = URI(@url)
|
17
18
|
return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
|
18
19
|
return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
|
19
|
-
ordered_engines.find { |engine| engine === uri }
|
20
|
+
ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) }
|
20
21
|
rescue URI::InvalidURIError
|
21
22
|
nil
|
22
23
|
end
|
24
|
+
|
25
|
+
def has_allowed_iframe_origins?(engine)
|
26
|
+
allowed_regexes = @options[:allowed_iframe_regexes] || []
|
27
|
+
engine.iframe_origins.all? { |o| allowed_regexes.any? { |r| o =~ r } }
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
@@ -7,6 +7,7 @@ module Onebox
|
|
7
7
|
def self.included(klass)
|
8
8
|
klass.include(Onebox::Engine)
|
9
9
|
klass.matches_regexp(klass.twitch_regexp)
|
10
|
+
klass.requires_iframe_origins "https://player.twitch.tv"
|
10
11
|
klass.include(InstanceMethods)
|
11
12
|
end
|
12
13
|
|
@@ -25,7 +26,7 @@ module Onebox
|
|
25
26
|
|
26
27
|
def to_html
|
27
28
|
<<~HTML
|
28
|
-
<iframe src="
|
29
|
+
<iframe src="https://#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
|
29
30
|
HTML
|
30
31
|
end
|
31
32
|
end
|
data/lib/onebox/preview.rb
CHANGED
@@ -7,10 +7,14 @@ module Onebox
|
|
7
7
|
client_exception = defined?(Net::HTTPClientException) ? Net::HTTPClientException : Net::HTTPServerException
|
8
8
|
WEB_EXCEPTIONS ||= [client_exception, OpenURI::HTTPError, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED]
|
9
9
|
|
10
|
-
def initialize(link,
|
10
|
+
def initialize(link, options = Onebox.options)
|
11
11
|
@url = link
|
12
|
-
@options =
|
13
|
-
|
12
|
+
@options = options.dup
|
13
|
+
|
14
|
+
allowed_origins = @options[:allowed_iframe_origins] || Onebox::Engine.all_iframe_origins
|
15
|
+
@options[:allowed_iframe_regexes] = Engine.origins_to_regexes(allowed_origins)
|
16
|
+
|
17
|
+
@engine_class = Matcher.new(@url, @options).oneboxed
|
14
18
|
end
|
15
19
|
|
16
20
|
def to_s
|
@@ -63,7 +67,10 @@ module Onebox
|
|
63
67
|
end
|
64
68
|
|
65
69
|
def sanitize(html)
|
66
|
-
|
70
|
+
config = @options[:sanitize_config] || Sanitize::Config::ONEBOX
|
71
|
+
config = config.merge(allowed_iframe_regexes: @options[:allowed_iframe_regexes])
|
72
|
+
|
73
|
+
Sanitize.fragment(html, config)
|
67
74
|
end
|
68
75
|
|
69
76
|
def engine
|
@@ -12,9 +12,9 @@ class Sanitize
|
|
12
12
|
'a' => RELAXED[:attributes]['a'] + %w(target),
|
13
13
|
'audio' => %w[controls],
|
14
14
|
'embed' => %w[height src type width],
|
15
|
-
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href],
|
15
|
+
'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
|
16
16
|
'source' => %w[src type],
|
17
|
-
'video' => %w[controls height loop width autoplay muted poster],
|
17
|
+
'video' => %w[controls height loop width autoplay muted poster controlslist playsinline],
|
18
18
|
'path' => %w[d],
|
19
19
|
'svg' => ['aria-hidden', 'width', 'height', 'viewbox'],
|
20
20
|
'div' => [:data], # any data-* attributes,
|
@@ -39,6 +39,22 @@ class Sanitize
|
|
39
39
|
else
|
40
40
|
a_tag.remove_attribute('target')
|
41
41
|
end
|
42
|
+
end,
|
43
|
+
|
44
|
+
lambda do |env|
|
45
|
+
next unless env[:node_name] == 'iframe'
|
46
|
+
|
47
|
+
iframe = env[:node]
|
48
|
+
allowed_regexes = env[:config][:allowed_iframe_regexes] || [/.*/]
|
49
|
+
|
50
|
+
allowed = allowed_regexes.any? { |r| iframe["src"] =~ r }
|
51
|
+
|
52
|
+
if !allowed
|
53
|
+
# add a data attribute with the blocked src. This is not required
|
54
|
+
# but makes it much easier to troubleshoot onebox issues
|
55
|
+
iframe["data-unsanitized-src"] = iframe["src"]
|
56
|
+
iframe.remove_attribute("src")
|
57
|
+
end
|
42
58
|
end
|
43
59
|
],
|
44
60
|
|
data/lib/onebox/version.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-
|
13
|
+
date: 2020-08-27 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- Rakefile
|
301
301
|
- lib/onebox.rb
|
302
302
|
- lib/onebox/engine.rb
|
303
|
+
- lib/onebox/engine/allowlisted_generic_onebox.rb
|
303
304
|
- lib/onebox/engine/amazon_onebox.rb
|
304
305
|
- lib/onebox/engine/asciinema_onebox.rb
|
305
306
|
- lib/onebox/engine/audio_onebox.rb
|
@@ -353,8 +354,6 @@ files:
|
|
353
354
|
- lib/onebox/engine/typeform_onebox.rb
|
354
355
|
- lib/onebox/engine/video_onebox.rb
|
355
356
|
- lib/onebox/engine/vimeo_onebox.rb
|
356
|
-
- lib/onebox/engine/wechat_mp_onebox.rb
|
357
|
-
- lib/onebox/engine/whitelisted_generic_onebox.rb
|
358
357
|
- lib/onebox/engine/wikimedia_onebox.rb
|
359
358
|
- lib/onebox/engine/wikipedia_onebox.rb
|
360
359
|
- lib/onebox/engine/wistia_onebox.rb
|
@@ -380,6 +379,7 @@ files:
|
|
380
379
|
- lib/onebox/web_helpers.rb
|
381
380
|
- onebox.gemspec
|
382
381
|
- templates/_layout.mustache
|
382
|
+
- templates/allowlistedgeneric.mustache
|
383
383
|
- templates/amazon.mustache
|
384
384
|
- templates/githubblob.mustache
|
385
385
|
- templates/githubcommit.mustache
|
@@ -396,8 +396,6 @@ files:
|
|
396
396
|
- templates/pubmed.mustache
|
397
397
|
- templates/stackexchange.mustache
|
398
398
|
- templates/twitterstatus.mustache
|
399
|
-
- templates/wechatmp.mustache
|
400
|
-
- templates/whitelistedgeneric.mustache
|
401
399
|
- templates/wikimedia.mustache
|
402
400
|
- templates/wikipedia.mustache
|
403
401
|
- templates/xkcd.mustache
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Onebox
|
4
|
-
module Engine
|
5
|
-
class WechatMpOnebox
|
6
|
-
include Engine
|
7
|
-
include LayoutSupport
|
8
|
-
include HTML
|
9
|
-
|
10
|
-
always_https
|
11
|
-
matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
|
12
|
-
|
13
|
-
def tld
|
14
|
-
@tld || @@matcher.match(@url)["tld"]
|
15
|
-
end
|
16
|
-
|
17
|
-
def http_params
|
18
|
-
{
|
19
|
-
'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
|
20
|
-
'Accept-Encoding' => 'plain'
|
21
|
-
}
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def extract_script_value(var_name)
|
27
|
-
if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
|
28
|
-
e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
|
29
|
-
CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# TODO need to handle hotlink protection from wechat
|
34
|
-
def image
|
35
|
-
if banner_image = extract_script_value("msg_cdn_url")
|
36
|
-
return banner_image
|
37
|
-
end
|
38
|
-
|
39
|
-
if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
|
40
|
-
attributes = main_image.first.attributes
|
41
|
-
|
42
|
-
return attributes["data-src"].to_s if attributes["data-src"]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def data
|
47
|
-
title = CGI.unescapeHTML(raw.css("title").inner_text)
|
48
|
-
by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
|
49
|
-
|
50
|
-
result = {
|
51
|
-
link: extract_script_value("msg_link") || link,
|
52
|
-
title: title,
|
53
|
-
image: image,
|
54
|
-
description: extract_script_value("msg_desc"),
|
55
|
-
by_info: by_info
|
56
|
-
}
|
57
|
-
|
58
|
-
result
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
data/templates/wechatmp.mustache
DELETED