onebox 1.9.30 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 72df1362582e006323722a42dc0787537c297df11f0c0e950b243759b10f74d7
4
- data.tar.gz: d0e483271248e8efc72207ec28dae1836e3469376fc7849383201906d4c522e2
3
+ metadata.gz: 2a487ded9a3811b625e50e77656011a13996919443570a91d4a409625388d9f1
4
+ data.tar.gz: e3638c662cd35ecc7bb043c56a5d049931c17b250232dada802a75c37fe19dec
5
5
  SHA512:
6
- metadata.gz: 2277c8b9be42305155c66fb4d97e5987868e34a997f87827a3189ab8073a538ae3afd0b9c89b0271e0671c334d096e87813360ef140298fcdfac2fa58948239a
7
- data.tar.gz: ae8306987d4a6ac15fb95a76a9ff44816fc9b84bd73f3ad4af63963fe40834b17de9d7d125f3e92a50ea523ab2cc84cf6b90eea1e8100abe62d991444dd8b946
6
+ metadata.gz: 0bbc8c191618dbc0639eebc0c7c427bb461f565d14c85ea933837ed5765916a1d75ca948684de8a754bd8f4181363ad8da134f292966d841377024c952ddef51
7
+ data.tar.gz: c31848cdf2735a4a3f5fa9873e90d5f89b13829c01381c7e78d5298f587fbba1035505bd33dba8b17cce4ad3a75d41a8ceea54bc4ba20f8fa7f63236fce41e51
data/README.md CHANGED
@@ -71,12 +71,11 @@ out the project. You can then try out URLs.
71
71
  The server doesn't reload code changes automatically (PRs accepted!) so
72
72
  make sure to hit CTRL-C and restart the server to try a code change out.
73
73
 
74
-
75
74
  Adding Support for a new URL
76
75
  ----------------------------
77
76
 
78
77
  1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
79
- If it does, you can probably get away with just whitelisting the URL in `Onebox::Engine::WhitelistedGenericOnebox` (see: [Whitelisted Generic Onebox caveats](#user-content-whitelisted-generic-onebox-caveats)).
78
+ If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
80
79
  If the site does not support open standards, you can create a new engine.
81
80
 
82
81
  2. Create new onebox engine
@@ -164,16 +163,23 @@ Adding Support for a new URL
164
163
  require_relative "engine/name_onebox"
165
164
  ```
166
165
 
167
-
168
- Whitelisted Generic Onebox caveats
166
+ Allowlisted Generic Onebox caveats
169
167
  ----------------------------------
170
168
 
171
- The Whitelisted Generic Onebox has some caveats for its use, beyond simply whitelisting the domain.
169
+ The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
172
170
 
173
- 1. The domain must be whitelisted
171
+ 1. The domain must be allowlisted
174
172
  2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
175
173
  3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
176
174
 
175
+ Ignoring Canonical URLs
176
+ -----------------------
177
+
178
+ Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
179
+
180
+ ```html
181
+ <meta property="og:ignore_canonical" content="true" />
182
+ ```
177
183
 
178
184
  Installing
179
185
  ----------
@@ -141,7 +141,7 @@ require_relative "engine/wikimedia_onebox"
141
141
  require_relative "engine/wikipedia_onebox"
142
142
  require_relative "engine/youtube_onebox"
143
143
  require_relative "engine/youku_onebox"
144
- require_relative "engine/whitelisted_generic_onebox"
144
+ require_relative "engine/allowlisted_generic_onebox"
145
145
  require_relative "engine/pubmed_onebox"
146
146
  require_relative "engine/soundcloud_onebox"
147
147
  require_relative "engine/imgur_onebox"
@@ -168,7 +168,6 @@ require_relative "engine/twitch_clips_onebox"
168
168
  require_relative "engine/twitch_stream_onebox"
169
169
  require_relative "engine/twitch_video_onebox"
170
170
  require_relative "engine/trello_onebox"
171
- require_relative "engine/wechat_mp_onebox"
172
171
  require_relative "engine/cloudapp_onebox"
173
172
  require_relative "engine/wistia_onebox"
174
173
  require_relative "engine/simplecast_onebox"
@@ -4,20 +4,20 @@ require 'htmlentities'
4
4
 
5
5
  module Onebox
6
6
  module Engine
7
- class WhitelistedGenericOnebox
7
+ class AllowlistedGenericOnebox
8
8
  include Engine
9
9
  include StandardEmbed
10
10
  include LayoutSupport
11
11
 
12
- def self.whitelist=(list)
13
- @whitelist = list
12
+ def self.allowed_domains=(list)
13
+ @allowed_domains = list
14
14
  end
15
15
 
16
- def self.whitelist
17
- @whitelist ||= default_whitelist.dup
16
+ def self.allowed_domains
17
+ @allowed_domains ||= default_allowed_domains.dup
18
18
  end
19
19
 
20
- def self.default_whitelist
20
+ def self.default_allowed_domains
21
21
  %w(
22
22
  23hq.com
23
23
  500px.com
@@ -176,13 +176,13 @@ module Onebox
176
176
  !!(uri.path =~ /\d{4}\/\d{2}\//)
177
177
  end
178
178
 
179
- def self.twitter_label_whitelist
179
+ def self.allowed_twitter_labels
180
180
  ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
181
181
  end
182
182
 
183
183
  def self.===(other)
184
184
  other.kind_of?(URI) ?
185
- host_matches(other, whitelist) || probable_wordpress(other) || probable_discourse(other) :
185
+ host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
186
186
  super
187
187
  end
188
188
 
@@ -233,11 +233,11 @@ module Onebox
233
233
  end
234
234
 
235
235
  # Twitter labels
236
- if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label1] =~ /#{l}/i }
236
+ if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
237
237
  d[:label_1] = Onebox::Helpers.truncate(d[:label1])
238
238
  d[:data_1] = Onebox::Helpers.truncate(d[:data1])
239
239
  end
240
- if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label2] =~ /#{l}/i }
240
+ if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
241
241
  unless Onebox::Helpers.blank?(d[:label_1])
242
242
  d[:label_2] = Onebox::Helpers.truncate(d[:label2])
243
243
  d[:data_2] = Onebox::Helpers.truncate(d[:data2])
@@ -261,7 +261,7 @@ module Onebox
261
261
  def rewrite_https(html)
262
262
  return unless html
263
263
  uri = URI(@url)
264
- if WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.rewrites)
264
+ if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
265
265
  html = html.gsub("http://", "https://")
266
266
  end
267
267
  html
@@ -309,7 +309,7 @@ module Onebox
309
309
  data[:height] &&
310
310
  (
311
311
  data[:html]["iframe"] ||
312
- WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
312
+ AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
313
313
  )
314
314
  end
315
315
 
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -22,7 +22,7 @@ module Onebox
22
22
  </iframe>
23
23
  HTML
24
24
  else
25
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
25
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
26
26
  return if Onebox::Helpers.blank?(html)
27
27
  html
28
28
  end
@@ -10,7 +10,7 @@ module Onebox
10
10
  always_https
11
11
 
12
12
  def self.priority
13
- # This engine should have priority over WhitelistedGenericOnebox.
13
+ # This engine should have priority over AllowlistedGenericOnebox.
14
14
  1
15
15
  end
16
16
 
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -45,7 +45,7 @@ module Onebox
45
45
  </aside>
46
46
  HTML
47
47
  else
48
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
48
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
49
49
  return if Onebox::Helpers.blank?(html)
50
50
  html
51
51
  end
@@ -32,7 +32,7 @@ module Onebox
32
32
  add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
33
33
 
34
34
  def always_https?
35
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts) || super
35
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
36
36
  end
37
37
 
38
38
  def raw
@@ -8,7 +8,7 @@ module Onebox
8
8
  matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
9
9
 
10
10
  def always_https?
11
- WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.https_hosts)
11
+ AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
12
12
  end
13
13
 
14
14
  def to_html
@@ -45,7 +45,7 @@ module Onebox
45
45
  HTML
46
46
  else
47
47
  # for channel pages
48
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @timeout).to_html
48
+ html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
49
49
  return if Onebox::Helpers.blank?(html)
50
50
  html.gsub!(/['"]\/\//, "https://")
51
51
  html
@@ -16,7 +16,10 @@ module Onebox
16
16
  uri = URI(@url)
17
17
  return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
18
18
  return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
19
- ordered_engines.find { |engine| engine === uri }
19
+ ordered_engines
20
+ .select { |engine| engine === uri }
21
+ .sort_by { |engine| engine.to_s }
22
+ .last
20
23
  rescue URI::InvalidURIError
21
24
  nil
22
25
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "1.9.30"
4
+ VERSION = "2.0.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.30
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-07-07 00:00:00.000000000 Z
13
+ date: 2020-07-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable
@@ -300,6 +300,7 @@ files:
300
300
  - Rakefile
301
301
  - lib/onebox.rb
302
302
  - lib/onebox/engine.rb
303
+ - lib/onebox/engine/allowlisted_generic_onebox.rb
303
304
  - lib/onebox/engine/amazon_onebox.rb
304
305
  - lib/onebox/engine/asciinema_onebox.rb
305
306
  - lib/onebox/engine/audio_onebox.rb
@@ -353,8 +354,6 @@ files:
353
354
  - lib/onebox/engine/typeform_onebox.rb
354
355
  - lib/onebox/engine/video_onebox.rb
355
356
  - lib/onebox/engine/vimeo_onebox.rb
356
- - lib/onebox/engine/wechat_mp_onebox.rb
357
- - lib/onebox/engine/whitelisted_generic_onebox.rb
358
357
  - lib/onebox/engine/wikimedia_onebox.rb
359
358
  - lib/onebox/engine/wikipedia_onebox.rb
360
359
  - lib/onebox/engine/wistia_onebox.rb
@@ -380,6 +379,7 @@ files:
380
379
  - lib/onebox/web_helpers.rb
381
380
  - onebox.gemspec
382
381
  - templates/_layout.mustache
382
+ - templates/allowlistedgeneric.mustache
383
383
  - templates/amazon.mustache
384
384
  - templates/githubblob.mustache
385
385
  - templates/githubcommit.mustache
@@ -396,8 +396,6 @@ files:
396
396
  - templates/pubmed.mustache
397
397
  - templates/stackexchange.mustache
398
398
  - templates/twitterstatus.mustache
399
- - templates/wechatmp.mustache
400
- - templates/whitelistedgeneric.mustache
401
399
  - templates/wikimedia.mustache
402
400
  - templates/wikipedia.mustache
403
401
  - templates/xkcd.mustache
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Onebox
4
- module Engine
5
- class WechatMpOnebox
6
- include Engine
7
- include LayoutSupport
8
- include HTML
9
-
10
- always_https
11
- matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
12
-
13
- def tld
14
- @tld || @@matcher.match(@url)["tld"]
15
- end
16
-
17
- def http_params
18
- {
19
- 'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
20
- 'Accept-Encoding' => 'plain'
21
- }
22
- end
23
-
24
- private
25
-
26
- def extract_script_value(var_name)
27
- if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
28
- e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
29
- CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
30
- end
31
- end
32
-
33
- # TODO need to handle hotlink protection from wechat
34
- def image
35
- if banner_image = extract_script_value("msg_cdn_url")
36
- return banner_image
37
- end
38
-
39
- if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
40
- attributes = main_image.first.attributes
41
-
42
- return attributes["data-src"].to_s if attributes["data-src"]
43
- end
44
- end
45
-
46
- def data
47
- title = CGI.unescapeHTML(raw.css("title").inner_text)
48
- by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
49
-
50
- result = {
51
- link: extract_script_value("msg_link") || link,
52
- title: title,
53
- image: image,
54
- description: extract_script_value("msg_desc"),
55
- by_info: by_info
56
- }
57
-
58
- result
59
- end
60
- end
61
- end
62
- end
@@ -1,4 +0,0 @@
1
- <h3><a href='{{link}}' target='_blank' rel='noopener'>{{title}}</a></h3>
2
- {{#by_info}}<b>{{by_info}}</b>{{/by_info}}
3
- <p>{{description}}</p>
4
-