onebox 1.9.28.4 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f0921bd8d91072c930eccee436ef05e362fbe20b0d4ec8d39cecb74580723c9
4
- data.tar.gz: 58ff915a7d3a7dcbf2a7a0bcaf66366ad6210ba17b84695695be9a067c54be92
3
+ metadata.gz: 373f78c55bcd96d80865329cabb0b3f2e091cfbac87f71a9b913b0512649e4c5
4
+ data.tar.gz: 3c21d762657d8f96109ea5db3048daca0948544c0d25625f4242ff35680e094c
5
5
  SHA512:
6
- metadata.gz: 5eb93e33a7c8c4919d47d03975545d08be18038d83e6b763ab5e7fabb1fe9b9a1ddc35a688656234e37231dff9b131900e87d00d0363e5e6f3454d9dad2988ed
7
- data.tar.gz: 7097f982b53eb11d057f601af63e928941f086534a03ec7d92eb7794bf2981a9138d6891add442f52345b91a70f611397a86e4647a7302a12991d8560c5c96ba
6
+ metadata.gz: ae768140c4c42b634a10e9c3f8ab716adbb2bbda5f74a4c1c6205abaee4cbff557d3e87ae25b2f81039caed5c330e9dccadbce87b6e8af3ba691a35114ecffa8
7
+ data.tar.gz: 3102dc52f8bd7a9246ae1854289ad1138283b2dbf92f8a7bc53c99c650176d7b9c38afd57549305baf34e622fcde335fd13bee075b4f803470ea50c3f784407d
data/README.md CHANGED
@@ -46,6 +46,14 @@ preview = Onebox.preview(url)
46
46
  "#{preview}" == preview.to_s #=> true
47
47
  ```
48
48
 
49
+ ### Twitch Onebox
50
+
51
+ To be able to embed Twitch video and clips, pass `hostname` in the options to `Onebox.preview`
52
+
53
+ ```ruby
54
+ preview = Onebox.preview(url, hostname: 'www.example.com')
55
+ ```
56
+
49
57
  Ruby Support
50
58
  ------------
51
59
 
@@ -63,12 +71,11 @@ out the project. You can then try out URLs.
63
71
  The server doesn't reload code changes automatically (PRs accepted!) so
64
72
  make sure to hit CTRL-C and restart the server to try a code change out.
65
73
 
66
-
67
74
  Adding Support for a new URL
68
75
  ----------------------------
69
76
 
70
77
  1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
71
- If it does, you can probably get away with just whitelisting the URL in `Onebox::Engine::WhitelistedGenericOnebox` (see: [Whitelisted Generic Onebox caveats](#user-content-whitelisted-generic-onebox-caveats)).
78
+ If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
72
79
  If the site does not support open standards, you can create a new engine.
73
80
 
74
81
  2. Create new onebox engine
@@ -156,16 +163,23 @@ Adding Support for a new URL
156
163
  require_relative "engine/name_onebox"
157
164
  ```
158
165
 
159
-
160
- Whitelisted Generic Onebox caveats
166
+ Allowlisted Generic Onebox caveats
161
167
  ----------------------------------
162
168
 
163
- The Whitelisted Generic Onebox has some caveats for its use, beyond simply whitelisting the domain.
169
+ The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
164
170
 
165
- 1. The domain must be whitelisted
171
+ 1. The domain must be allowlisted
166
172
  2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
167
173
  3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
168
174
 
175
+ Ignoring Canonical URLs
176
+ -----------------------
177
+
178
+ Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
179
+
180
+ ```html
181
+ <meta property="og:ignore_canonical" content="true" />
182
+ ```
169
183
 
170
184
  Installing
171
185
  ----------
@@ -141,7 +141,7 @@ require_relative "engine/wikimedia_onebox"
141
141
  require_relative "engine/wikipedia_onebox"
142
142
  require_relative "engine/youtube_onebox"
143
143
  require_relative "engine/youku_onebox"
144
- require_relative "engine/whitelisted_generic_onebox"
144
+ require_relative "engine/allowlisted_generic_onebox"
145
145
  require_relative "engine/pubmed_onebox"
146
146
  require_relative "engine/soundcloud_onebox"
147
147
  require_relative "engine/imgur_onebox"
@@ -168,7 +168,6 @@ require_relative "engine/twitch_clips_onebox"
168
168
  require_relative "engine/twitch_stream_onebox"
169
169
  require_relative "engine/twitch_video_onebox"
170
170
  require_relative "engine/trello_onebox"
171
- require_relative "engine/wechat_mp_onebox"
172
171
  require_relative "engine/cloudapp_onebox"
173
172
  require_relative "engine/wistia_onebox"
174
173
  require_relative "engine/simplecast_onebox"
@@ -4,20 +4,24 @@ require 'htmlentities'
4
4
 
5
5
  module Onebox
6
6
  module Engine
7
- class WhitelistedGenericOnebox
7
+ class AllowlistedGenericOnebox
8
8
  include Engine
9
9
  include StandardEmbed
10
10
  include LayoutSupport
11
11
 
12
- def self.whitelist=(list)
13
- @whitelist = list
12
+ def self.priority
13
+ 200
14
14
  end
15
15
 
16
- def self.whitelist
17
- @whitelist ||= default_whitelist.dup
16
+ def self.allowed_domains=(list)
17
+ @allowed_domains = list
18
18
  end
19
19
 
20
- def self.default_whitelist
20
+ def self.allowed_domains
21
+ @allowed_domains ||= default_allowed_domains.dup
22
+ end
23
+
24
+ def self.default_allowed_domains
21
25
  %w(
22
26
  23hq.com
23
27
  500px.com
@@ -176,13 +180,13 @@ module Onebox
176
180
  !!(uri.path =~ /\d{4}\/\d{2}\//)
177
181
  end
178
182
 
179
- def self.twitter_label_whitelist
183
+ def self.allowed_twitter_labels
180
184
  ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
181
185
  end
182
186
 
183
187
  def self.===(other)
184
188
  other.kind_of?(URI) ?
185
- host_matches(other, whitelist) || probable_wordpress(other) || probable_discourse(other) :
189
+ host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
186
190
  super
187
191
  end
188
192
 
@@ -233,11 +237,11 @@ module Onebox
233
237
  end
234
238
 
235
239
  # Twitter labels
236
- if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label1] =~ /#{l}/i }
240
+ if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
237
241
  d[:label_1] = Onebox::Helpers.truncate(d[:label1])
238
242
  d[:data_1] = Onebox::Helpers.truncate(d[:data1])
239
243
  end
240
- if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label2] =~ /#{l}/i }
244
+ if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
241
245
  unless Onebox::Helpers.blank?(d[:label_1])
242
246
  d[:label_2] = Onebox::Helpers.truncate(d[:label2])
243
247
  d[:data_2] = Onebox::Helpers.truncate(d[:data2])
@@ -261,7 +265,7 @@ module Onebox
261
265
  def rewrite_https(html)
262
266
  return unless html
263
267
  uri = URI(@url)
264
- if WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.rewrites)
268
+ if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
265
269
  html = html.gsub("http://", "https://")
266
270
  end
267
271
  html
@@ -309,7 +313,7 @@ module Onebox
309
313
  data[:height] &&
310
314
  (
311
315
  data[:html]["iframe"] ||
312
- WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
316
+ AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
313
317
  )
314
318
  end
315
319
 
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -22,7 +22,7 @@ module Onebox
22
22
  </iframe>
23
23
  HTML
24
24
  else
25
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
25
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
26
26
  return if Onebox::Helpers.blank?(html)
27
27
  html
28
28
  end
@@ -10,7 +10,7 @@ module Onebox
10
10
  always_https
11
11
 
12
12
  def self.priority
13
- # This engine should have priority over WhitelistedGenericOnebox.
13
+ # This engine should have priority over AllowlistedGenericOnebox.
14
14
  1
15
15
  end
16
16
 
@@ -20,11 +20,25 @@ module Onebox
20
20
  private
21
21
 
22
22
  def video_html(og)
23
+ escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
24
+
23
25
  <<-HTML
24
- <video width='#{og.video_width}' height='#{og.video_height}' #{og.title_attr} poster="#{og.get_secure_image}" controls loop>
25
- <source src='#{og.video_secure_url}' type='video/mp4'>
26
- </video>
27
- HTML
26
+ <aside class="onebox google-photos">
27
+ <header class="source">
28
+ <img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
29
+ <a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
30
+ </header>
31
+ <article class="onebox-body">
32
+ <h3><a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{og.title}</a></h3>
33
+ <div class="aspect-image-full-size">
34
+ <a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">
35
+ <img src="#{og.get_secure_image}" class="scale-image"/>
36
+ <span class="instagram-video-icon"></span>
37
+ </a>
38
+ </div>
39
+ </article>
40
+ </aside>
41
+ HTML
28
42
  end
29
43
 
30
44
  def album_html(og)
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -45,7 +45,7 @@ module Onebox
45
45
  </aside>
46
46
  HTML
47
47
  else
48
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
48
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
49
49
  return if Onebox::Helpers.blank?(html)
50
50
  html
51
51
  end
@@ -32,7 +32,7 @@ module Onebox
32
32
  add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
33
33
 
34
34
  def always_https?
35
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts) || super
35
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
36
36
  end
37
37
 
38
38
  def raw
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -45,7 +45,7 @@ module Onebox
45
45
  HTML
46
46
  else
47
47
  # for channel pages
48
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
48
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
49
49
  return if Onebox::Helpers.blank?(html)
50
50
  html.gsub!(/['"]\/\//, "https://")
51
51
  html
@@ -51,17 +51,14 @@ module Onebox
51
51
 
52
52
  raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
53
53
 
54
- uri = URI(location)
55
- uri = URI("#{domain}#{location}") if !uri.host
54
+ uri = Addressable::URI.parse(location)
55
+ uri = Addressable::URI.join(domain, uri) if !uri.host
56
56
 
57
57
  result = StringIO.new
58
- Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
58
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
59
59
  http.open_timeout = Onebox.options.connect_timeout
60
60
  http.read_timeout = Onebox.options.timeout
61
- if uri.is_a?(URI::HTTPS)
62
- http.use_ssl = true
63
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
64
- end
61
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE # Work around path building bugs
65
62
 
66
63
  headers ||= {}
67
64
 
@@ -76,10 +73,12 @@ module Onebox
76
73
  http.request(request) do |response|
77
74
 
78
75
  if cookie = response.get_fields('set-cookie')
79
- header = { 'Cookie' => cookie.join }
76
+ # HACK: If this breaks again in the future, use HTTP::CookieJar from gem 'http-cookie'
77
+ # See test: it "does not send cookies to the wrong domain"
78
+ redir_header = { 'Cookie' => cookie.join('; ') }
80
79
  end
81
80
 
82
- header = nil unless header.is_a? Hash
81
+ redir_header = nil unless redir_header.is_a? Hash
83
82
 
84
83
  code = response.code.to_i
85
84
  unless code === 200
@@ -88,7 +87,7 @@ module Onebox
88
87
  response['location'],
89
88
  limit - 1,
90
89
  "#{uri.scheme}://#{uri.host}",
91
- header
90
+ redir_header
92
91
  )
93
92
  end
94
93
 
@@ -24,7 +24,9 @@ module Onebox
24
24
  end
25
25
 
26
26
  def to_html
27
- "<iframe src=\"//#{base_url}#{query_params}&autoplay=false\" width=\"620\" height=\"378\" frameborder=\"0\" style=\"overflow: hidden;\" scrolling=\"no\" allowfullscreen=\"allowfullscreen\"></iframe>"
27
+ <<~HTML
28
+ <iframe src="//#{base_url}#{query_params}&parent=#{options[:hostname]}&autoplay=false" width="620" height="378" frameborder="0" style="overflow: hidden;" scrolling="no" allowfullscreen="allowfullscreen"></iframe>
29
+ HTML
28
30
  end
29
31
  end
30
32
  end
@@ -14,7 +14,7 @@ class Sanitize
14
14
  'embed' => %w[height src type width],
15
15
  'iframe' => %w[allowfullscreen frameborder height scrolling src width data-original-href],
16
16
  'source' => %w[src type],
17
- 'video' => %w[controls height loop width autoplay muted poster],
17
+ 'video' => %w[controls height loop width autoplay muted poster controlslist playsinline],
18
18
  'path' => %w[d],
19
19
  'svg' => ['aria-hidden', 'width', 'height', 'viewbox'],
20
20
  'div' => [:data], # any data-* attributes,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "1.9.28.4"
4
+ VERSION = "2.0.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.28.4
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-06-11 00:00:00.000000000 Z
13
+ date: 2020-08-18 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable
@@ -300,6 +300,7 @@ files:
300
300
  - Rakefile
301
301
  - lib/onebox.rb
302
302
  - lib/onebox/engine.rb
303
+ - lib/onebox/engine/allowlisted_generic_onebox.rb
303
304
  - lib/onebox/engine/amazon_onebox.rb
304
305
  - lib/onebox/engine/asciinema_onebox.rb
305
306
  - lib/onebox/engine/audio_onebox.rb
@@ -353,8 +354,6 @@ files:
353
354
  - lib/onebox/engine/typeform_onebox.rb
354
355
  - lib/onebox/engine/video_onebox.rb
355
356
  - lib/onebox/engine/vimeo_onebox.rb
356
- - lib/onebox/engine/wechat_mp_onebox.rb
357
- - lib/onebox/engine/whitelisted_generic_onebox.rb
358
357
  - lib/onebox/engine/wikimedia_onebox.rb
359
358
  - lib/onebox/engine/wikipedia_onebox.rb
360
359
  - lib/onebox/engine/wistia_onebox.rb
@@ -380,6 +379,7 @@ files:
380
379
  - lib/onebox/web_helpers.rb
381
380
  - onebox.gemspec
382
381
  - templates/_layout.mustache
382
+ - templates/allowlistedgeneric.mustache
383
383
  - templates/amazon.mustache
384
384
  - templates/githubblob.mustache
385
385
  - templates/githubcommit.mustache
@@ -396,8 +396,6 @@ files:
396
396
  - templates/pubmed.mustache
397
397
  - templates/stackexchange.mustache
398
398
  - templates/twitterstatus.mustache
399
- - templates/wechatmp.mustache
400
- - templates/whitelistedgeneric.mustache
401
399
  - templates/wikimedia.mustache
402
400
  - templates/wikipedia.mustache
403
401
  - templates/xkcd.mustache
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Onebox
4
- module Engine
5
- class WechatMpOnebox
6
- include Engine
7
- include LayoutSupport
8
- include HTML
9
-
10
- always_https
11
- matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
12
-
13
- def tld
14
- @tld || @@matcher.match(@url)["tld"]
15
- end
16
-
17
- def http_params
18
- {
19
- 'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
20
- 'Accept-Encoding' => 'plain'
21
- }
22
- end
23
-
24
- private
25
-
26
- def extract_script_value(var_name)
27
- if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
28
- e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
29
- CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
30
- end
31
- end
32
-
33
- # TODO need to handle hotlink protection from wechat
34
- def image
35
- if banner_image = extract_script_value("msg_cdn_url")
36
- return banner_image
37
- end
38
-
39
- if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
40
- attributes = main_image.first.attributes
41
-
42
- return attributes["data-src"].to_s if attributes["data-src"]
43
- end
44
- end
45
-
46
- def data
47
- title = CGI.unescapeHTML(raw.css("title").inner_text)
48
- by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
49
-
50
- result = {
51
- link: extract_script_value("msg_link") || link,
52
- title: title,
53
- image: image,
54
- description: extract_script_value("msg_desc"),
55
- by_info: by_info
56
- }
57
-
58
- result
59
- end
60
- end
61
- end
62
- end
@@ -1,4 +0,0 @@
1
- <h3><a href='{{link}}' target='_blank' rel='noopener'>{{title}}</a></h3>
2
- {{#by_info}}<b>{{by_info}}</b>{{/by_info}}
3
- <p>{{description}}</p>
4
-