onebox 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 373f78c55bcd96d80865329cabb0b3f2e091cfbac87f71a9b913b0512649e4c5
4
- data.tar.gz: 3c21d762657d8f96109ea5db3048daca0948544c0d25625f4242ff35680e094c
3
+ metadata.gz: 9b0d5f22e692ec775f7a30215772f6c13f41a4a17c6faf2c25eb902cb7ab8915
4
+ data.tar.gz: d07753cc17aa1f3d656e59d4600f1d966ed21a60f10261563a8eca908fd560a7
5
5
  SHA512:
6
- metadata.gz: ae768140c4c42b634a10e9c3f8ab716adbb2bbda5f74a4c1c6205abaee4cbff557d3e87ae25b2f81039caed5c330e9dccadbce87b6e8af3ba691a35114ecffa8
7
- data.tar.gz: 3102dc52f8bd7a9246ae1854289ad1138283b2dbf92f8a7bc53c99c650176d7b9c38afd57549305baf34e622fcde335fd13bee075b4f803470ea50c3f784407d
6
+ metadata.gz: 04dd906ddead063b6b787d17bb709cac2fb3c1a9dc4d3932354c374218a8dac4f0710f741a0388b5a013919bdb9e77d0e0216b1f9606de8c1822dc52938c6070
7
+ data.tar.gz: b3a782c32fb3499e3610560eb67cd35623ca85987bedb1932f2cba759f397a7b8fe9af17e2f87cf54ff246551685ebbdadd5322478ea7a0a0944319ba7f55fed
@@ -12,6 +12,22 @@ module Onebox
12
12
  end.map(&method(:const_get))
13
13
  end
14
14
 
15
+ def self.all_iframe_origins
16
+ engines.flat_map { |e| e.iframe_origins }.uniq.compact
17
+ end
18
+
19
+ def self.origins_to_regexes(origins)
20
+ return /.*/ if origins.include?("*")
21
+ origins.map do |origin|
22
+ escaped_origin = Regexp.escape(origin)
23
+ if origin.start_with?("*.", "https://*.", "http://*.")
24
+ escaped_origin = escaped_origin.sub("\\*", '\S*')
25
+ end
26
+
27
+ Regexp.new("\\A#{escaped_origin}", 'i')
28
+ end
29
+ end
30
+
15
31
  attr_reader :url, :uri
16
32
  attr_reader :timeout
17
33
 
@@ -100,6 +116,14 @@ module Onebox
100
116
  class_variable_set :@@matcher, r
101
117
  end
102
118
 
119
+ def requires_iframe_origins(*origins)
120
+ class_variable_set :@@iframe_origins, origins
121
+ end
122
+
123
+ def iframe_origins
124
+ class_variable_defined?(:@@iframe_origins) ? class_variable_get(:@@iframe_origins) : []
125
+ end
126
+
103
127
  # calculates a name for onebox using the class name of engine
104
128
  def onebox_name
105
129
  name.split("::").last.downcase.gsub(/onebox/, "")
@@ -281,7 +281,9 @@ module Onebox
281
281
  end
282
282
 
283
283
  def is_card?
284
- data[:card] == 'player' && data[:player] =~ URI::regexp
284
+ data[:card] == 'player' &&
285
+ data[:player] =~ URI::regexp &&
286
+ options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
285
287
  end
286
288
 
287
289
  def is_article?
@@ -305,16 +307,19 @@ module Onebox
305
307
  end
306
308
 
307
309
  def is_video?
308
- data[:type] =~ /^video[\/\.]/ && !Onebox::Helpers.blank?(data[:video])
310
+ data[:type] =~ /^video[\/\.]/ &&
311
+ data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
312
+ !Onebox::Helpers.blank?(data[:video])
309
313
  end
310
314
 
311
315
  def is_embedded?
312
- data[:html] &&
313
- data[:height] &&
314
- (
315
- data[:html]["iframe"] ||
316
- AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
317
- )
316
+ return false unless data[:html] && data[:height]
317
+ return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
318
+ return false unless data[:html]["iframe"]
319
+
320
+ fragment = Nokogiri::HTML::fragment(data[:html])
321
+ src = fragment.at_css('iframe')&.[]("src")
322
+ options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
318
323
  end
319
324
 
320
325
  def card_html
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  matches_regexp(/^https?:\/\/.*\.bandcamp\.com\/(album|track)\//)
10
10
  always_https
11
+ requires_iframe_origins "https://bandcamp.com"
11
12
 
12
13
  def placeholder_html
13
14
  og = get_opengraph
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  matches_regexp(/^https?:\/\/.*\.facebook\.com\/(\w+)\/(videos|\?).*/)
10
10
  always_https
11
+ requires_iframe_origins "https://www.facebook.com"
11
12
 
12
13
  def to_html
13
14
  metadata = get_twitter
@@ -7,6 +7,7 @@ module Onebox
7
7
 
8
8
  matches_regexp /^(https?:)?\/\/((www|calendar)\.google\.[\w.]{2,}|goo\.gl)\/calendar\/.+$/
9
9
  always_https
10
+ requires_iframe_origins "https://calendar.google.com"
10
11
 
11
12
  def to_html
12
13
  url = @url.split('&').first
@@ -23,6 +23,8 @@ module Onebox
23
23
 
24
24
  always_https
25
25
 
26
+ requires_iframe_origins("https://maps.google.com", "https://google.com")
27
+
26
28
  # Matches shortened Google Maps URLs
27
29
  matches_regexp :short, %r"^(https?:)?//goo\.gl/maps/"
28
30
 
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  always_https
10
10
  matches_regexp(/^https?:\/\/[a-z0-9]+\.kaltura\.com\/id\/[a-zA-Z0-9]+/)
11
+ requires_iframe_origins "https://*.kaltura.com"
11
12
 
12
13
  def preview_html
13
14
  og = get_opengraph
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  matches_regexp(/^https?:\/\/sketchfab\.com\/(?:models\/|3d-models\/(?:[^\/\s]+-)?)([a-z0-9]{32})/)
10
10
  always_https
11
+ requires_iframe_origins("https://sketchfab.com")
11
12
 
12
13
  def to_html
13
14
  og = get_opengraph
@@ -7,10 +7,11 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
10
+ requires_iframe_origins "https://slides.com"
10
11
 
11
12
  def to_html
12
13
  <<-HTML
13
- <iframe src="//slides.com#{uri.path}/embed?style=light"
14
+ <iframe src="https://slides.com#{uri.path}/embed?style=light"
14
15
  width="576"
15
16
  height="420"
16
17
  scrolling="no"
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "cgi"
4
4
  require "onebox/open_graph"
5
+ require 'onebox/oembed'
5
6
 
6
7
  module Onebox
7
8
  module Engine
@@ -8,6 +8,7 @@ module Onebox
8
8
 
9
9
  always_https
10
10
  matches_regexp(/^https?:\/\/store\.steampowered\.com\/app\/\d+/)
11
+ requires_iframe_origins "https://store.steampowered.com"
11
12
 
12
13
  def placeholder_html
13
14
  og = get_opengraph
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https:\/\/trello\.com\/[bc]\/\W*/)
10
+ requires_iframe_origins "https://trello.com"
10
11
  always_https
11
12
 
12
13
  def to_html
@@ -9,6 +9,8 @@ class Onebox::Engine::TwitchClipsOnebox
9
9
  end
10
10
  include Onebox::Mixins::TwitchOnebox
11
11
 
12
+ requires_iframe_origins "https://clips.twitch.tv"
13
+
12
14
  def query_params
13
15
  "clip=#{twitch_id}"
14
16
  end
@@ -6,6 +6,7 @@ module Onebox
6
6
  include Engine
7
7
 
8
8
  matches_regexp(/^https?:\/\/[a-z0-9\-_]+\.typeform\.com\/to\/[a-zA-Z0-9]+/)
9
+ requires_iframe_origins "https://*.typeform.com"
9
10
  always_https
10
11
 
11
12
  def to_html
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/^https?:\/\/(www\.)?vimeo\.com\/\d+/)
10
+ requires_iframe_origins "https://player.vimeo.com"
10
11
  always_https
11
12
 
12
13
  WIDTH ||= 640
@@ -7,6 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
 
9
9
  matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
10
+ requires_iframe_origins "https://fast.wistia.com"
10
11
  always_https
11
12
 
12
13
  def to_html
@@ -7,6 +7,7 @@ module Onebox
7
7
  include HTML
8
8
 
9
9
  matches_regexp(/^(https?:\/\/)?([\da-z\.-]+)(youku.com\/)(.)+\/?$/)
10
+ requires_iframe_origins "https://player.youku.com"
10
11
 
11
12
  # Try to get the video ID. Works for URLs of the form:
12
13
  # * http://v.youku.com/v_show/id_XNjM3MzAxNzc2.html
@@ -19,7 +20,14 @@ module Onebox
19
20
  end
20
21
 
21
22
  def to_html
22
- "<embed width='570' height='360' src='https://players.youku.com/player.php/sid/#{video_id}/v.swf' type='application/x-shockwave-flash'></embed>"
23
+ <<~HTML
24
+ <iframe src="https://player.youku.com/embed/#{video_id}"
25
+ width="640"
26
+ height="430"
27
+ frameborder='0'
28
+ allowfullscreen>
29
+ </iframe>
30
+ HTML
23
31
  end
24
32
 
25
33
  private
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'onebox/oembed'
4
-
5
3
  module Onebox
6
4
  module Engine
7
5
  class YoutubeOnebox
@@ -9,16 +7,17 @@ module Onebox
9
7
  include StandardEmbed
10
8
 
11
9
  matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
10
+ requires_iframe_origins "https://www.youtube.com"
12
11
  always_https
13
12
 
14
13
  WIDTH ||= 480
15
14
  HEIGHT ||= 360
16
15
 
17
16
  def placeholder_html
18
- if video_id
19
- "<img src='https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg' width='#{WIDTH}' height='#{HEIGHT}' #{video_oembed_data.title_attr}>"
20
- elsif list_id
21
- "<img src='#{list_thumbnail_url}' width='#{WIDTH}' height='#{HEIGHT}' #{list_oembed_data.title_attr}>"
17
+ og = get_opengraph.data
18
+
19
+ if video_id || list_id
20
+ "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
22
21
  else
23
22
  to_html
24
23
  end
@@ -53,7 +52,7 @@ module Onebox
53
52
  end
54
53
 
55
54
  def video_title
56
- @video_title ||= video_oembed_data.title
55
+ @video_title ||= get_opengraph.data[:title]
57
56
  end
58
57
 
59
58
  private
@@ -81,29 +80,6 @@ module Onebox
81
80
  @list_id ||= params['list']
82
81
  end
83
82
 
84
- def list_thumbnail_url
85
- @list_thumbnail_url ||= begin
86
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
87
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
88
- data = Onebox::Oembed.new(response)
89
- data.thumbnail_url
90
- rescue
91
- nil
92
- end
93
- end
94
-
95
- def video_oembed_data
96
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
97
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
98
- Onebox::Oembed.new(response)
99
- end
100
-
101
- def list_oembed_data
102
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
103
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
104
- Onebox::Oembed.new(response)
105
- end
106
-
107
83
  def embed_params
108
84
  p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
109
85
 
@@ -2,8 +2,9 @@
2
2
 
3
3
  module Onebox
4
4
  class Matcher
5
- def initialize(link)
5
+ def initialize(link, options = {})
6
6
  @url = link
7
+ @options = options
7
8
  end
8
9
 
9
10
  def ordered_engines
@@ -16,9 +17,14 @@ module Onebox
16
17
  uri = URI(@url)
17
18
  return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
18
19
  return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
19
- ordered_engines.find { |engine| engine === uri }
20
+ ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) }
20
21
  rescue URI::InvalidURIError
21
22
  nil
22
23
  end
24
+
25
+ def has_allowed_iframe_origins?(engine)
26
+ allowed_regexes = @options[:allowed_iframe_regexes] || []
27
+ engine.iframe_origins.all? { |o| allowed_regexes.any? { |r| o =~ r } }
28
+ end
23
29
  end
24
30
  end
@@ -7,6 +7,7 @@ module Onebox
7
7
  def self.included(klass)
8
8
  klass.include(Onebox::Engine)
9
9
  klass.matches_regexp(klass.twitch_regexp)
10
+ klass.requires_iframe_origins "https://player.twitch.tv"
10
11
  klass.include(InstanceMethods)
11
12
  end
12
13
 
@@ -25,7 +26,7 @@ module Onebox
25
26
 
26
27
  def to_html
27
28
  <<~HTML
28
- <iframe src="//#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
29
+ <iframe src="https://#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
29
30
  HTML
30
31
  end
31
32
  end
@@ -7,10 +7,14 @@ module Onebox
7
7
  client_exception = defined?(Net::HTTPClientException) ? Net::HTTPClientException : Net::HTTPServerException
8
8
  WEB_EXCEPTIONS ||= [client_exception, OpenURI::HTTPError, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED]
9
9
 
10
- def initialize(link, parameters = Onebox.options)
10
+ def initialize(link, options = Onebox.options)
11
11
  @url = link
12
- @options = parameters
13
- @engine_class = Matcher.new(@url).oneboxed
12
+ @options = options.dup
13
+
14
+ allowed_origins = @options[:allowed_iframe_origins] || Onebox::Engine.all_iframe_origins
15
+ @options[:allowed_iframe_regexes] = Engine.origins_to_regexes(allowed_origins)
16
+
17
+ @engine_class = Matcher.new(@url, @options).oneboxed
14
18
  end
15
19
 
16
20
  def to_s
@@ -63,7 +67,10 @@ module Onebox
63
67
  end
64
68
 
65
69
  def sanitize(html)
66
- Sanitize.fragment(html, @options[:sanitize_config] || Sanitize::Config::ONEBOX)
70
+ config = @options[:sanitize_config] || Sanitize::Config::ONEBOX
71
+ config = config.merge(allowed_iframe_regexes: @options[:allowed_iframe_regexes])
72
+
73
+ Sanitize.fragment(html, config)
67
74
  end
68
75
 
69
76
  def engine
@@ -12,7 +12,7 @@ class Sanitize
12
12
  'a' => RELAXED[:attributes]['a'] + %w(target),
13
13
  'audio' => %w[controls],
14
14
  'embed' => %w[height src type width],
15
- 'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href],
15
+ 'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href data-unsanitized-src],
16
16
  'source' => %w[src type],
17
17
  'video' => %w[controls height loop width autoplay muted poster controlslist playsinline],
18
18
  'path' => %w[d],
@@ -39,6 +39,22 @@ class Sanitize
39
39
  else
40
40
  a_tag.remove_attribute('target')
41
41
  end
42
+ end,
43
+
44
+ lambda do |env|
45
+ next unless env[:node_name] == 'iframe'
46
+
47
+ iframe = env[:node]
48
+ allowed_regexes = env[:config][:allowed_iframe_regexes] || [/.*/]
49
+
50
+ allowed = allowed_regexes.any? { |r| iframe["src"] =~ r }
51
+
52
+ if !allowed
53
+ # add a data attribute with the blocked src. This is not required
54
+ # but makes it much easier to troubleshoot onebox issues
55
+ iframe["data-unsanitized-src"] = iframe["src"]
56
+ iframe.remove_attribute("src")
57
+ end
42
58
  end
43
59
  ],
44
60
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.0.2"
4
+ VERSION = "2.1.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-08-18 00:00:00.000000000 Z
13
+ date: 2020-08-27 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable