onebox 1.9.30 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -6
  3. data/lib/onebox/engine.rb +25 -2
  4. data/lib/onebox/engine/{whitelisted_generic_onebox.rb → allowlisted_generic_onebox.rb} +28 -19
  5. data/lib/onebox/engine/audio_onebox.rb +1 -1
  6. data/lib/onebox/engine/bandcamp_onebox.rb +1 -0
  7. data/lib/onebox/engine/facebook_media_onebox.rb +2 -1
  8. data/lib/onebox/engine/gfycat_onebox.rb +1 -1
  9. data/lib/onebox/engine/google_calendar_onebox.rb +1 -0
  10. data/lib/onebox/engine/google_maps_onebox.rb +2 -0
  11. data/lib/onebox/engine/image_onebox.rb +1 -1
  12. data/lib/onebox/engine/kaltura_onebox.rb +1 -0
  13. data/lib/onebox/engine/reddit_media_onebox.rb +1 -1
  14. data/lib/onebox/engine/sketchfab_onebox.rb +1 -0
  15. data/lib/onebox/engine/slides_onebox.rb +2 -1
  16. data/lib/onebox/engine/soundcloud_onebox.rb +1 -0
  17. data/lib/onebox/engine/standard_embed.rb +2 -1
  18. data/lib/onebox/engine/steam_store_onebox.rb +1 -0
  19. data/lib/onebox/engine/trello_onebox.rb +1 -0
  20. data/lib/onebox/engine/twitch_clips_onebox.rb +2 -0
  21. data/lib/onebox/engine/typeform_onebox.rb +1 -0
  22. data/lib/onebox/engine/video_onebox.rb +1 -1
  23. data/lib/onebox/engine/vimeo_onebox.rb +1 -0
  24. data/lib/onebox/engine/wistia_onebox.rb +1 -0
  25. data/lib/onebox/engine/youku_onebox.rb +9 -1
  26. data/lib/onebox/engine/youtube_onebox.rb +7 -31
  27. data/lib/onebox/matcher.rb +8 -2
  28. data/lib/onebox/mixins/twitch_onebox.rb +2 -1
  29. data/lib/onebox/preview.rb +11 -4
  30. data/lib/onebox/sanitize_config.rb +18 -2
  31. data/lib/onebox/version.rb +1 -1
  32. data/templates/{whitelistedgeneric.mustache → allowlistedgeneric.mustache} +0 -0
  33. metadata +4 -6
  34. data/lib/onebox/engine/wechat_mp_onebox.rb +0 -62
  35. data/templates/wechatmp.mustache +0 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 72df1362582e006323722a42dc0787537c297df11f0c0e950b243759b10f74d7
4
- data.tar.gz: d0e483271248e8efc72207ec28dae1836e3469376fc7849383201906d4c522e2
3
+ metadata.gz: ebbcb2268bcade7ace9670046cfbf13f58862561b59802ef0011bc8ae869a6a7
4
+ data.tar.gz: 51e35394339f1cdd6c6dc7be9971bbc7a0e8b8f9a31b483d877e96be86319e0f
5
5
  SHA512:
6
- metadata.gz: 2277c8b9be42305155c66fb4d97e5987868e34a997f87827a3189ab8073a538ae3afd0b9c89b0271e0671c334d096e87813360ef140298fcdfac2fa58948239a
7
- data.tar.gz: ae8306987d4a6ac15fb95a76a9ff44816fc9b84bd73f3ad4af63963fe40834b17de9d7d125f3e92a50ea523ab2cc84cf6b90eea1e8100abe62d991444dd8b946
6
+ metadata.gz: 9f540f3437675e0abb5ccb25013b9b516e6bb101a21a26e00a2ecf111940bc660ef8706122f805a279cb67e85823be82183d9f095bc86a7aa7f494c0ecd68645
7
+ data.tar.gz: 82295ede48990cf922e1e3e6cce645cd1caf4f9965d8e289d756d727f4b7886e116a5ffa1676f7df3873d524b28161c30c75478e2008887a80324e2ed8639fde
data/README.md CHANGED
@@ -71,12 +71,11 @@ out the project. You can then try out URLs.
71
71
  The server doesn't reload code changes automatically (PRs accepted!) so
72
72
  make sure to hit CTRL-C and restart the server to try a code change out.
73
73
 
74
-
75
74
  Adding Support for a new URL
76
75
  ----------------------------
77
76
 
78
77
  1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
79
- If it does, you can probably get away with just whitelisting the URL in `Onebox::Engine::WhitelistedGenericOnebox` (see: [Whitelisted Generic Onebox caveats](#user-content-whitelisted-generic-onebox-caveats)).
78
+ If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
80
79
  If the site does not support open standards, you can create a new engine.
81
80
 
82
81
  2. Create new onebox engine
@@ -164,16 +163,23 @@ Adding Support for a new URL
164
163
  require_relative "engine/name_onebox"
165
164
  ```
166
165
 
167
-
168
- Whitelisted Generic Onebox caveats
166
+ Allowlisted Generic Onebox caveats
169
167
  ----------------------------------
170
168
 
171
- The Whitelisted Generic Onebox has some caveats for its use, beyond simply whitelisting the domain.
169
+ The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
172
170
 
173
- 1. The domain must be whitelisted
171
+ 1. The domain must be allowlisted
174
172
  2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
175
173
  3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
176
174
 
175
+ Ignoring Canonical URLs
176
+ -----------------------
177
+
178
+ Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
179
+
180
+ ```html
181
+ <meta property="og:ignore_canonical" content="true" />
182
+ ```
177
183
 
178
184
  Installing
179
185
  ----------
@@ -12,6 +12,22 @@ module Onebox
12
12
  end.map(&method(:const_get))
13
13
  end
14
14
 
15
+ def self.all_iframe_origins
16
+ engines.flat_map { |e| e.iframe_origins }.uniq.compact
17
+ end
18
+
19
+ def self.origins_to_regexes(origins)
20
+ return /.*/ if origins.include?("*")
21
+ origins.map do |origin|
22
+ escaped_origin = Regexp.escape(origin)
23
+ if origin.start_with?("*.", "https://*.", "http://*.")
24
+ escaped_origin = escaped_origin.sub("\\*", '\S*')
25
+ end
26
+
27
+ Regexp.new("\\A#{escaped_origin}", 'i')
28
+ end
29
+ end
30
+
15
31
  attr_reader :url, :uri
16
32
  attr_reader :timeout
17
33
 
@@ -100,6 +116,14 @@ module Onebox
100
116
  class_variable_set :@@matcher, r
101
117
  end
102
118
 
119
+ def requires_iframe_origins(*origins)
120
+ class_variable_set :@@iframe_origins, origins
121
+ end
122
+
123
+ def iframe_origins
124
+ class_variable_defined?(:@@iframe_origins) ? class_variable_get(:@@iframe_origins) : []
125
+ end
126
+
103
127
  # calculates a name for onebox using the class name of engine
104
128
  def onebox_name
105
129
  name.split("::").last.downcase.gsub(/onebox/, "")
@@ -141,7 +165,7 @@ require_relative "engine/wikimedia_onebox"
141
165
  require_relative "engine/wikipedia_onebox"
142
166
  require_relative "engine/youtube_onebox"
143
167
  require_relative "engine/youku_onebox"
144
- require_relative "engine/whitelisted_generic_onebox"
168
+ require_relative "engine/allowlisted_generic_onebox"
145
169
  require_relative "engine/pubmed_onebox"
146
170
  require_relative "engine/soundcloud_onebox"
147
171
  require_relative "engine/imgur_onebox"
@@ -168,7 +192,6 @@ require_relative "engine/twitch_clips_onebox"
168
192
  require_relative "engine/twitch_stream_onebox"
169
193
  require_relative "engine/twitch_video_onebox"
170
194
  require_relative "engine/trello_onebox"
171
- require_relative "engine/wechat_mp_onebox"
172
195
  require_relative "engine/cloudapp_onebox"
173
196
  require_relative "engine/wistia_onebox"
174
197
  require_relative "engine/simplecast_onebox"
@@ -4,20 +4,24 @@ require 'htmlentities'
4
4
 
5
5
  module Onebox
6
6
  module Engine
7
- class WhitelistedGenericOnebox
7
+ class AllowlistedGenericOnebox
8
8
  include Engine
9
9
  include StandardEmbed
10
10
  include LayoutSupport
11
11
 
12
- def self.whitelist=(list)
13
- @whitelist = list
12
+ def self.priority
13
+ 200
14
14
  end
15
15
 
16
- def self.whitelist
17
- @whitelist ||= default_whitelist.dup
16
+ def self.allowed_domains=(list)
17
+ @allowed_domains = list
18
18
  end
19
19
 
20
- def self.default_whitelist
20
+ def self.allowed_domains
21
+ @allowed_domains ||= default_allowed_domains.dup
22
+ end
23
+
24
+ def self.default_allowed_domains
21
25
  %w(
22
26
  23hq.com
23
27
  500px.com
@@ -176,13 +180,13 @@ module Onebox
176
180
  !!(uri.path =~ /\d{4}\/\d{2}\//)
177
181
  end
178
182
 
179
- def self.twitter_label_whitelist
183
+ def self.allowed_twitter_labels
180
184
  ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
181
185
  end
182
186
 
183
187
  def self.===(other)
184
188
  other.kind_of?(URI) ?
185
- host_matches(other, whitelist) || probable_wordpress(other) || probable_discourse(other) :
189
+ host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
186
190
  super
187
191
  end
188
192
 
@@ -233,11 +237,11 @@ module Onebox
233
237
  end
234
238
 
235
239
  # Twitter labels
236
- if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label1] =~ /#{l}/i }
240
+ if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
237
241
  d[:label_1] = Onebox::Helpers.truncate(d[:label1])
238
242
  d[:data_1] = Onebox::Helpers.truncate(d[:data1])
239
243
  end
240
- if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label2] =~ /#{l}/i }
244
+ if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
241
245
  unless Onebox::Helpers.blank?(d[:label_1])
242
246
  d[:label_2] = Onebox::Helpers.truncate(d[:label2])
243
247
  d[:data_2] = Onebox::Helpers.truncate(d[:data2])
@@ -261,7 +265,7 @@ module Onebox
261
265
  def rewrite_https(html)
262
266
  return unless html
263
267
  uri = URI(@url)
264
- if WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.rewrites)
268
+ if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
265
269
  html = html.gsub("http://", "https://")
266
270
  end
267
271
  html
@@ -277,7 +281,9 @@ module Onebox
277
281
  end
278
282
 
279
283
  def is_card?
280
- data[:card] == 'player' && data[:player] =~ URI::regexp
284
+ data[:card] == 'player' &&
285
+ data[:player] =~ URI::regexp &&
286
+ options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
281
287
  end
282
288
 
283
289
  def is_article?
@@ -301,16 +307,19 @@ module Onebox
301
307
  end
302
308
 
303
309
  def is_video?
304
- data[:type] =~ /^video[\/\.]/ && !Onebox::Helpers.blank?(data[:video])
310
+ data[:type] =~ /^video[\/\.]/ &&
311
+ data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
312
+ !Onebox::Helpers.blank?(data[:video])
305
313
  end
306
314
 
307
315
  def is_embedded?
308
- data[:html] &&
309
- data[:height] &&
310
- (
311
- data[:html]["iframe"] ||
312
- WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
313
- )
316
+ return false unless data[:html] && data[:height]
317
+ return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
318
+ return false unless data[:html]["iframe"]
319
+
320
+ fragment = Nokogiri::HTML::fragment(data[:html])
321
+ src = fragment.at_css('iframe')&.[]("src")
322
+ options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
314
323
  end
315
324
 
316
325
  def card_html
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  matches_regexp(/^https?:\/\/.*\.bandcamp\.com\/(album|track)\//)
10
10
  always_https
11
+ requires_iframe_origins "https://bandcamp.com"
11
12
 
12
13
  def placeholder_html
13
14
  og = get_opengraph
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  matches_regexp(/^https?:\/\/.*\.facebook\.com\/(\w+)\/(videos|\?).*/)
10
10
  always_https
11
+ requires_iframe_origins "https://www.facebook.com"
11
12
 
12
13
  def to_html
13
14
  metadata = get_twitter
@@ -22,7 +23,7 @@ module Onebox
22
23
  </iframe>
23
24
  HTML
24
25
  else
25
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
26
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
26
27
  return if Onebox::Helpers.blank?(html)
27
28
  html
28
29
  end
@@ -10,7 +10,7 @@ module Onebox
10
10
  always_https
11
11
 
12
12
  def self.priority
13
- # This engine should have priority over WhitelistedGenericOnebox.
13
+ # This engine should have priority over AllowlistedGenericOnebox.
14
14
  1
15
15
  end
16
16
 
@@ -7,6 +7,7 @@ module Onebox
7
7
 
8
8
  matches_regexp /^(https?:)?\/\/((www|calendar)\.google\.[\w.]{2,}|goo\.gl)\/calendar\/.+$/
9
9
  always_https
10
+ requires_iframe_origins "https://calendar.google.com"
10
11
 
11
12
  def to_html
12
13
  url = @url.split('&').first
@@ -23,6 +23,8 @@ module Onebox
23
23
 
24
24
  always_https
25
25
 
26
+ requires_iframe_origins("https://maps.google.com", "https://google.com")
27
+
26
28
  # Matches shortened Google Maps URLs
27
29
  matches_regexp :short, %r"^(https?:)?//goo\.gl/maps/"
28
30
 
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  always_https
10
10
  matches_regexp(/^https?:\/\/[a-z0-9]+\.kaltura\.com\/id\/[a-zA-Z0-9]+/)
11
+ requires_iframe_origins "https://*.kaltura.com"
11
12
 
12
13
  def preview_html
13
14
  og = get_opengraph
@@ -45,7 +45,7 @@ module Onebox
45
45
  </aside>
46
46
  HTML
47
47
  else
48
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
48
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
49
49
  return if Onebox::Helpers.blank?(html)
50
50
  html
51
51
  end
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  matches_regexp(/^https?:\/\/sketchfab\.com\/(?:models\/|3d-models\/(?:[^\/\s]+-)?)([a-z0-9]{32})/)
10
10
  always_https
11
+ requires_iframe_origins("https://sketchfab.com")
11
12
 
12
13
  def to_html
13
14
  og = get_opengraph
@@ -7,10 +7,11 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
10
+ requires_iframe_origins "https://slides.com"
10
11
 
11
12
  def to_html
12
13
  <<-HTML
13
- <iframe src="//slides.com#{uri.path}/embed?style=light"
14
+ <iframe src="https://slides.com#{uri.path}/embed?style=light"
14
15
  width="576"
15
16
  height="420"
16
17
  scrolling="no"
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https?:\/\/soundcloud\.com/)
10
+ requires_iframe_origins "https://w.soundcloud.com"
10
11
  always_https
11
12
 
12
13
  def to_html
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "cgi"
4
4
  require "onebox/open_graph"
5
+ require 'onebox/oembed'
5
6
 
6
7
  module Onebox
7
8
  module Engine
@@ -32,7 +33,7 @@ module Onebox
32
33
  add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
33
34
 
34
35
  def always_https?
35
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts) || super
36
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
36
37
  end
37
38
 
38
39
  def raw
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  always_https
10
10
  matches_regexp(/^https?:\/\/store\.steampowered\.com\/app\/\d+/)
11
+ requires_iframe_origins "https://store.steampowered.com"
11
12
 
12
13
  def placeholder_html
13
14
  og = get_opengraph
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https:\/\/trello\.com\/[bc]\/\W*/)
10
+ requires_iframe_origins "https://trello.com"
10
11
  always_https
11
12
 
12
13
  def to_html
@@ -9,6 +9,8 @@ class Onebox::Engine::TwitchClipsOnebox
9
9
  end
10
10
  include Onebox::Mixins::TwitchOnebox
11
11
 
12
+ requires_iframe_origins "https://clips.twitch.tv"
13
+
12
14
  def query_params
13
15
  "clip=#{twitch_id}"
14
16
  end
@@ -6,6 +6,7 @@ module Onebox
6
6
  include Engine
7
7
 
8
8
  matches_regexp(/^https?:\/\/[a-z0-9\-_]+\.typeform\.com\/to\/[a-zA-Z0-9]+/)
9
+ requires_iframe_origins "https://*.typeform.com"
9
10
  always_https
10
11
 
11
12
  def to_html
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https?:\/\/(www\.)?vimeo\.com\/\d+/)
10
+ requires_iframe_origins "https://player.vimeo.com"
10
11
  always_https
11
12
 
12
13
  WIDTH ||= 640
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
10
+ requires_iframe_origins "https://fast.wistia.com"
10
11
  always_https
11
12
 
12
13
  def to_html
@@ -7,6 +7,7 @@ module Onebox
7
7
  include HTML
8
8
 
9
9
  matches_regexp(/^(https?:\/\/)?([\da-z\.-]+)(youku.com\/)(.)+\/?$/)
10
+ requires_iframe_origins "https://player.youku.com"
10
11
 
11
12
  # Try to get the video ID. Works for URLs of the form:
12
13
  # * http://v.youku.com/v_show/id_XNjM3MzAxNzc2.html
@@ -19,7 +20,14 @@ module Onebox
19
20
  end
20
21
 
21
22
  def to_html
22
- "<embed width='570' height='360' src='https://players.youku.com/player.php/sid/#{video_id}/v.swf' type='application/x-shockwave-flash'></embed>"
23
+ <<~HTML
24
+ <iframe src="https://player.youku.com/embed/#{video_id}"
25
+ width="640"
26
+ height="430"
27
+ frameborder='0'
28
+ allowfullscreen>
29
+ </iframe>
30
+ HTML
23
31
  end
24
32
 
25
33
  private
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'onebox/oembed'
4
-
5
3
  module Onebox
6
4
  module Engine
7
5
  class YoutubeOnebox
@@ -9,16 +7,17 @@ module Onebox
9
7
  include StandardEmbed
10
8
 
11
9
  matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
10
+ requires_iframe_origins "https://www.youtube.com"
12
11
  always_https
13
12
 
14
13
  WIDTH ||= 480
15
14
  HEIGHT ||= 360
16
15
 
17
16
  def placeholder_html
18
- if video_id
19
- "<img src='https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg' width='#{WIDTH}' height='#{HEIGHT}' #{video_oembed_data.title_attr}>"
20
- elsif list_id
21
- "<img src='#{list_thumbnail_url}' width='#{WIDTH}' height='#{HEIGHT}' #{list_oembed_data.title_attr}>"
17
+ og = get_opengraph.data
18
+
19
+ if video_id || list_id
20
+ "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
22
21
  else
23
22
  to_html
24
23
  end
@@ -45,7 +44,7 @@ module Onebox
45
44
  HTML
46
45
  else
47
46
  # for channel pages
48
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
47
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
49
48
  return if Onebox::Helpers.blank?(html)
50
49
  html.gsub!(/['"]\/\//, "https://")
51
50
  html
@@ -53,7 +52,7 @@ module Onebox
53
52
  end
54
53
 
55
54
  def video_title
56
- @video_title ||= video_oembed_data.title
55
+ @video_title ||= get_opengraph.data[:title]
57
56
  end
58
57
 
59
58
  private
@@ -81,29 +80,6 @@ module Onebox
81
80
  @list_id ||= params['list']
82
81
  end
83
82
 
84
- def list_thumbnail_url
85
- @list_thumbnail_url ||= begin
86
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
87
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
88
- data = Onebox::Oembed.new(response)
89
- data.thumbnail_url
90
- rescue
91
- nil
92
- end
93
- end
94
-
95
- def video_oembed_data
96
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
97
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
98
- Onebox::Oembed.new(response)
99
- end
100
-
101
- def list_oembed_data
102
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
103
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
104
- Onebox::Oembed.new(response)
105
- end
106
-
107
83
  def embed_params
108
84
  p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
109
85
 
@@ -2,8 +2,9 @@
2
2
 
3
3
  module Onebox
4
4
  class Matcher
5
- def initialize(link)
5
+ def initialize(link, options = {})
6
6
  @url = link
7
+ @options = options
7
8
  end
8
9
 
9
10
  def ordered_engines
@@ -16,9 +17,14 @@ module Onebox
16
17
  uri = URI(@url)
17
18
  return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
18
19
  return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
19
- ordered_engines.find { |engine| engine === uri }
20
+ ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) }
20
21
  rescue URI::InvalidURIError
21
22
  nil
22
23
  end
24
+
25
+ def has_allowed_iframe_origins?(engine)
26
+ allowed_regexes = @options[:allowed_iframe_regexes] || []
27
+ engine.iframe_origins.all? { |o| allowed_regexes.any? { |r| o =~ r } }
28
+ end
23
29
  end
24
30
  end
@@ -7,6 +7,7 @@ module Onebox
7
7
  def self.included(klass)
8
8
  klass.include(Onebox::Engine)
9
9
  klass.matches_regexp(klass.twitch_regexp)
10
+ klass.requires_iframe_origins "https://player.twitch.tv"
10
11
  klass.include(InstanceMethods)
11
12
  end
12
13
 
@@ -25,7 +26,7 @@ module Onebox
25
26
 
26
27
  def to_html
27
28
  <<~HTML
28
- <iframe src="//#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
29
+ <iframe src="https://#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
29
30
  HTML
30
31
  end
31
32
  end
@@ -7,10 +7,14 @@ module Onebox
7
7
  client_exception = defined?(Net::HTTPClientException) ? Net::HTTPClientException : Net::HTTPServerException
8
8
  WEB_EXCEPTIONS ||= [client_exception, OpenURI::HTTPError, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED]
9
9
 
10
- def initialize(link, parameters = Onebox.options)
10
+ def initialize(link, options = Onebox.options)
11
11
  @url = link
12
- @options = parameters
13
- @engine_class = Matcher.new(@url).oneboxed
12
+ @options = options.dup
13
+
14
+ allowed_origins = @options[:allowed_iframe_origins] || Onebox::Engine.all_iframe_origins
15
+ @options[:allowed_iframe_regexes] = Engine.origins_to_regexes(allowed_origins)
16
+
17
+ @engine_class = Matcher.new(@url, @options).oneboxed
14
18
  end
15
19
 
16
20
  def to_s
@@ -63,7 +67,10 @@ module Onebox
63
67
  end
64
68
 
65
69
  def sanitize(html)
66
- Sanitize.fragment(html, @options[:sanitize_config] || Sanitize::Config::ONEBOX)
70
+ config = @options[:sanitize_config] || Sanitize::Config::ONEBOX
71
+ config = config.merge(allowed_iframe_regexes: @options[:allowed_iframe_regexes])
72
+
73
+ Sanitize.fragment(html, config)
67
74
  end
68
75
 
69
76
  def engine
@@ -12,9 +12,9 @@ class Sanitize
12
12
  'a' => RELAXED[:attributes]['a'] + %w(target),
13
13
  'audio' => %w[controls],
14
14
  'embed' => %w[height src type width],
15
- 'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href],
15
+ 'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
16
16
  'source' => %w[src type],
17
- 'video' => %w[controls height loop width autoplay muted poster],
17
+ 'video' => %w[controls height loop width autoplay muted poster controlslist playsinline],
18
18
  'path' => %w[d],
19
19
  'svg' => ['aria-hidden', 'width', 'height', 'viewbox'],
20
20
  'div' => [:data], # any data-* attributes,
@@ -39,6 +39,22 @@ class Sanitize
39
39
  else
40
40
  a_tag.remove_attribute('target')
41
41
  end
42
+ end,
43
+
44
+ lambda do |env|
45
+ next unless env[:node_name] == 'iframe'
46
+
47
+ iframe = env[:node]
48
+ allowed_regexes = env[:config][:allowed_iframe_regexes] || [/.*/]
49
+
50
+ allowed = allowed_regexes.any? { |r| iframe["src"] =~ r }
51
+
52
+ if !allowed
53
+ # add a data attribute with the blocked src. This is not required
54
+ # but makes it much easier to troubleshoot onebox issues
55
+ iframe["data-unsanitized-src"] = iframe["src"]
56
+ iframe.remove_attribute("src")
57
+ end
42
58
  end
43
59
  ],
44
60
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "1.9.30"
4
+ VERSION = "2.1.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.30
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-07-07 00:00:00.000000000 Z
13
+ date: 2020-08-27 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable
@@ -300,6 +300,7 @@ files:
300
300
  - Rakefile
301
301
  - lib/onebox.rb
302
302
  - lib/onebox/engine.rb
303
+ - lib/onebox/engine/allowlisted_generic_onebox.rb
303
304
  - lib/onebox/engine/amazon_onebox.rb
304
305
  - lib/onebox/engine/asciinema_onebox.rb
305
306
  - lib/onebox/engine/audio_onebox.rb
@@ -353,8 +354,6 @@ files:
353
354
  - lib/onebox/engine/typeform_onebox.rb
354
355
  - lib/onebox/engine/video_onebox.rb
355
356
  - lib/onebox/engine/vimeo_onebox.rb
356
- - lib/onebox/engine/wechat_mp_onebox.rb
357
- - lib/onebox/engine/whitelisted_generic_onebox.rb
358
357
  - lib/onebox/engine/wikimedia_onebox.rb
359
358
  - lib/onebox/engine/wikipedia_onebox.rb
360
359
  - lib/onebox/engine/wistia_onebox.rb
@@ -380,6 +379,7 @@ files:
380
379
  - lib/onebox/web_helpers.rb
381
380
  - onebox.gemspec
382
381
  - templates/_layout.mustache
382
+ - templates/allowlistedgeneric.mustache
383
383
  - templates/amazon.mustache
384
384
  - templates/githubblob.mustache
385
385
  - templates/githubcommit.mustache
@@ -396,8 +396,6 @@ files:
396
396
  - templates/pubmed.mustache
397
397
  - templates/stackexchange.mustache
398
398
  - templates/twitterstatus.mustache
399
- - templates/wechatmp.mustache
400
- - templates/whitelistedgeneric.mustache
401
399
  - templates/wikimedia.mustache
402
400
  - templates/wikipedia.mustache
403
401
  - templates/xkcd.mustache
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Onebox
4
- module Engine
5
- class WechatMpOnebox
6
- include Engine
7
- include LayoutSupport
8
- include HTML
9
-
10
- always_https
11
- matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
12
-
13
- def tld
14
- @tld || @@matcher.match(@url)["tld"]
15
- end
16
-
17
- def http_params
18
- {
19
- 'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
20
- 'Accept-Encoding' => 'plain'
21
- }
22
- end
23
-
24
- private
25
-
26
- def extract_script_value(var_name)
27
- if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
28
- e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
29
- CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
30
- end
31
- end
32
-
33
- # TODO need to handle hotlink protection from wechat
34
- def image
35
- if banner_image = extract_script_value("msg_cdn_url")
36
- return banner_image
37
- end
38
-
39
- if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
40
- attributes = main_image.first.attributes
41
-
42
- return attributes["data-src"].to_s if attributes["data-src"]
43
- end
44
- end
45
-
46
- def data
47
- title = CGI.unescapeHTML(raw.css("title").inner_text)
48
- by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
49
-
50
- result = {
51
- link: extract_script_value("msg_link") || link,
52
- title: title,
53
- image: image,
54
- description: extract_script_value("msg_desc"),
55
- by_info: by_info
56
- }
57
-
58
- result
59
- end
60
- end
61
- end
62
- end
@@ -1,4 +0,0 @@
1
- <h3><a href='{{link}}' target='_blank' rel='noopener'>{{title}}</a></h3>
2
- {{#by_info}}<b>{{by_info}}</b>{{/by_info}}
3
- <p>{{description}}</p>
4
-