onebox 1.9.30 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -6
- data/lib/onebox/engine.rb +1 -2
- data/lib/onebox/engine/{whitelisted_generic_onebox.rb → allowlisted_generic_onebox.rb} +12 -12
- data/lib/onebox/engine/audio_onebox.rb +1 -1
- data/lib/onebox/engine/facebook_media_onebox.rb +1 -1
- data/lib/onebox/engine/gfycat_onebox.rb +1 -1
- data/lib/onebox/engine/image_onebox.rb +1 -1
- data/lib/onebox/engine/reddit_media_onebox.rb +1 -1
- data/lib/onebox/engine/standard_embed.rb +1 -1
- data/lib/onebox/engine/video_onebox.rb +1 -1
- data/lib/onebox/engine/youtube_onebox.rb +1 -1
- data/lib/onebox/matcher.rb +4 -1
- data/lib/onebox/version.rb +1 -1
- data/templates/{whitelistedgeneric.mustache → allowlistedgeneric.mustache} +0 -0
- metadata +4 -6
- data/lib/onebox/engine/wechat_mp_onebox.rb +0 -62
- data/templates/wechatmp.mustache +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a487ded9a3811b625e50e77656011a13996919443570a91d4a409625388d9f1
|
4
|
+
data.tar.gz: e3638c662cd35ecc7bb043c56a5d049931c17b250232dada802a75c37fe19dec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bbc8c191618dbc0639eebc0c7c427bb461f565d14c85ea933837ed5765916a1d75ca948684de8a754bd8f4181363ad8da134f292966d841377024c952ddef51
|
7
|
+
data.tar.gz: c31848cdf2735a4a3f5fa9873e90d5f89b13829c01381c7e78d5298f587fbba1035505bd33dba8b17cce4ad3a75d41a8ceea54bc4ba20f8fa7f63236fce41e51
|
data/README.md
CHANGED
@@ -71,12 +71,11 @@ out the project. You can then try out URLs.
|
|
71
71
|
The server doesn't reload code changes automatically (PRs accepted!) so
|
72
72
|
make sure to hit CTRL-C and restart the server to try a code change out.
|
73
73
|
|
74
|
-
|
75
74
|
Adding Support for a new URL
|
76
75
|
----------------------------
|
77
76
|
|
78
77
|
1. Check if the site supports [oEmbed](http://oembed.com/) or [Open Graph](https://developers.facebook.com/docs/opengraph/).
|
79
|
-
If it does, you can probably get away with just
|
78
|
+
If it does, you can probably get away with just allowing the URL in `Onebox::Engine::AllowlistedGenericOnebox` (see: [Allowlisted Generic Onebox caveats](#user-content-allowlisted-generic-onebox-caveats)).
|
80
79
|
If the site does not support open standards, you can create a new engine.
|
81
80
|
|
82
81
|
2. Create new onebox engine
|
@@ -164,16 +163,23 @@ Adding Support for a new URL
|
|
164
163
|
require_relative "engine/name_onebox"
|
165
164
|
```
|
166
165
|
|
167
|
-
|
168
|
-
Whitelisted Generic Onebox caveats
|
166
|
+
Allowlisted Generic Onebox caveats
|
169
167
|
----------------------------------
|
170
168
|
|
171
|
-
The
|
169
|
+
The Allowlisted Generic Onebox has some caveats for its use, beyond simply allowlisting the domain.
|
172
170
|
|
173
|
-
1. The domain must be
|
171
|
+
1. The domain must be allowlisted
|
174
172
|
2. The URL you're oneboxing cannot be a root url (e.g. `http://example.com` won't work, but `http://example.com/page` will)
|
175
173
|
3. If the oneboxed URL responds with oEmbed and has a `rich` type: the `html` content must contain an `<iframe>`. Responses without an iframe will not be oneboxed.
|
176
174
|
|
175
|
+
Ignoring Canonical URLs
|
176
|
+
-----------------------
|
177
|
+
|
178
|
+
Onebox prefers to use canonical URLs instead of the raw inputted URL when searching for Open Graph metadata. If your site's canonical URL does not have opengraph metadata, use the `og:ignore_canonical` property to have Onebox ignore the canonical URL.
|
179
|
+
|
180
|
+
```html
|
181
|
+
<meta property="og:ignore_canonical" content="true" />
|
182
|
+
```
|
177
183
|
|
178
184
|
Installing
|
179
185
|
----------
|
data/lib/onebox/engine.rb
CHANGED
@@ -141,7 +141,7 @@ require_relative "engine/wikimedia_onebox"
|
|
141
141
|
require_relative "engine/wikipedia_onebox"
|
142
142
|
require_relative "engine/youtube_onebox"
|
143
143
|
require_relative "engine/youku_onebox"
|
144
|
-
require_relative "engine/
|
144
|
+
require_relative "engine/allowlisted_generic_onebox"
|
145
145
|
require_relative "engine/pubmed_onebox"
|
146
146
|
require_relative "engine/soundcloud_onebox"
|
147
147
|
require_relative "engine/imgur_onebox"
|
@@ -168,7 +168,6 @@ require_relative "engine/twitch_clips_onebox"
|
|
168
168
|
require_relative "engine/twitch_stream_onebox"
|
169
169
|
require_relative "engine/twitch_video_onebox"
|
170
170
|
require_relative "engine/trello_onebox"
|
171
|
-
require_relative "engine/wechat_mp_onebox"
|
172
171
|
require_relative "engine/cloudapp_onebox"
|
173
172
|
require_relative "engine/wistia_onebox"
|
174
173
|
require_relative "engine/simplecast_onebox"
|
@@ -4,20 +4,20 @@ require 'htmlentities'
|
|
4
4
|
|
5
5
|
module Onebox
|
6
6
|
module Engine
|
7
|
-
class
|
7
|
+
class AllowlistedGenericOnebox
|
8
8
|
include Engine
|
9
9
|
include StandardEmbed
|
10
10
|
include LayoutSupport
|
11
11
|
|
12
|
-
def self.
|
13
|
-
@
|
12
|
+
def self.allowed_domains=(list)
|
13
|
+
@allowed_domains = list
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.
|
17
|
-
@
|
16
|
+
def self.allowed_domains
|
17
|
+
@allowed_domains ||= default_allowed_domains.dup
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.
|
20
|
+
def self.default_allowed_domains
|
21
21
|
%w(
|
22
22
|
23hq.com
|
23
23
|
500px.com
|
@@ -176,13 +176,13 @@ module Onebox
|
|
176
176
|
!!(uri.path =~ /\d{4}\/\d{2}\//)
|
177
177
|
end
|
178
178
|
|
179
|
-
def self.
|
179
|
+
def self.allowed_twitter_labels
|
180
180
|
['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
|
181
181
|
end
|
182
182
|
|
183
183
|
def self.===(other)
|
184
184
|
other.kind_of?(URI) ?
|
185
|
-
host_matches(other,
|
185
|
+
host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
|
186
186
|
super
|
187
187
|
end
|
188
188
|
|
@@ -233,11 +233,11 @@ module Onebox
|
|
233
233
|
end
|
234
234
|
|
235
235
|
# Twitter labels
|
236
|
-
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!
|
236
|
+
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
|
237
237
|
d[:label_1] = Onebox::Helpers.truncate(d[:label1])
|
238
238
|
d[:data_1] = Onebox::Helpers.truncate(d[:data1])
|
239
239
|
end
|
240
|
-
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!
|
240
|
+
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
|
241
241
|
unless Onebox::Helpers.blank?(d[:label_1])
|
242
242
|
d[:label_2] = Onebox::Helpers.truncate(d[:label2])
|
243
243
|
d[:data_2] = Onebox::Helpers.truncate(d[:data2])
|
@@ -261,7 +261,7 @@ module Onebox
|
|
261
261
|
def rewrite_https(html)
|
262
262
|
return unless html
|
263
263
|
uri = URI(@url)
|
264
|
-
if
|
264
|
+
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
|
265
265
|
html = html.gsub("http://", "https://")
|
266
266
|
end
|
267
267
|
html
|
@@ -309,7 +309,7 @@ module Onebox
|
|
309
309
|
data[:height] &&
|
310
310
|
(
|
311
311
|
data[:html]["iframe"] ||
|
312
|
-
|
312
|
+
AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
|
313
313
|
)
|
314
314
|
end
|
315
315
|
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -32,7 +32,7 @@ module Onebox
|
|
32
32
|
add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
|
33
33
|
|
34
34
|
def always_https?
|
35
|
-
|
35
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
36
36
|
end
|
37
37
|
|
38
38
|
def raw
|
@@ -8,7 +8,7 @@ module Onebox
|
|
8
8
|
matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
|
9
9
|
|
10
10
|
def always_https?
|
11
|
-
|
11
|
+
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
|
12
12
|
end
|
13
13
|
|
14
14
|
def to_html
|
@@ -45,7 +45,7 @@ module Onebox
|
|
45
45
|
HTML
|
46
46
|
else
|
47
47
|
# for channel pages
|
48
|
-
html = Onebox::Engine::
|
48
|
+
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
|
49
49
|
return if Onebox::Helpers.blank?(html)
|
50
50
|
html.gsub!(/['"]\/\//, "https://")
|
51
51
|
html
|
data/lib/onebox/matcher.rb
CHANGED
@@ -16,7 +16,10 @@ module Onebox
|
|
16
16
|
uri = URI(@url)
|
17
17
|
return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
|
18
18
|
return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
|
19
|
-
ordered_engines
|
19
|
+
ordered_engines
|
20
|
+
.select { |engine| engine === uri }
|
21
|
+
.sort_by { |engine| engine.to_s }
|
22
|
+
.last
|
20
23
|
rescue URI::InvalidURIError
|
21
24
|
nil
|
22
25
|
end
|
data/lib/onebox/version.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joanna Zeta
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-07-
|
13
|
+
date: 2020-07-14 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: addressable
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- Rakefile
|
301
301
|
- lib/onebox.rb
|
302
302
|
- lib/onebox/engine.rb
|
303
|
+
- lib/onebox/engine/allowlisted_generic_onebox.rb
|
303
304
|
- lib/onebox/engine/amazon_onebox.rb
|
304
305
|
- lib/onebox/engine/asciinema_onebox.rb
|
305
306
|
- lib/onebox/engine/audio_onebox.rb
|
@@ -353,8 +354,6 @@ files:
|
|
353
354
|
- lib/onebox/engine/typeform_onebox.rb
|
354
355
|
- lib/onebox/engine/video_onebox.rb
|
355
356
|
- lib/onebox/engine/vimeo_onebox.rb
|
356
|
-
- lib/onebox/engine/wechat_mp_onebox.rb
|
357
|
-
- lib/onebox/engine/whitelisted_generic_onebox.rb
|
358
357
|
- lib/onebox/engine/wikimedia_onebox.rb
|
359
358
|
- lib/onebox/engine/wikipedia_onebox.rb
|
360
359
|
- lib/onebox/engine/wistia_onebox.rb
|
@@ -380,6 +379,7 @@ files:
|
|
380
379
|
- lib/onebox/web_helpers.rb
|
381
380
|
- onebox.gemspec
|
382
381
|
- templates/_layout.mustache
|
382
|
+
- templates/allowlistedgeneric.mustache
|
383
383
|
- templates/amazon.mustache
|
384
384
|
- templates/githubblob.mustache
|
385
385
|
- templates/githubcommit.mustache
|
@@ -396,8 +396,6 @@ files:
|
|
396
396
|
- templates/pubmed.mustache
|
397
397
|
- templates/stackexchange.mustache
|
398
398
|
- templates/twitterstatus.mustache
|
399
|
-
- templates/wechatmp.mustache
|
400
|
-
- templates/whitelistedgeneric.mustache
|
401
399
|
- templates/wikimedia.mustache
|
402
400
|
- templates/wikipedia.mustache
|
403
401
|
- templates/xkcd.mustache
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Onebox
|
4
|
-
module Engine
|
5
|
-
class WechatMpOnebox
|
6
|
-
include Engine
|
7
|
-
include LayoutSupport
|
8
|
-
include HTML
|
9
|
-
|
10
|
-
always_https
|
11
|
-
matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
|
12
|
-
|
13
|
-
def tld
|
14
|
-
@tld || @@matcher.match(@url)["tld"]
|
15
|
-
end
|
16
|
-
|
17
|
-
def http_params
|
18
|
-
{
|
19
|
-
'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
|
20
|
-
'Accept-Encoding' => 'plain'
|
21
|
-
}
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def extract_script_value(var_name)
|
27
|
-
if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
|
28
|
-
e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
|
29
|
-
CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# TODO need to handle hotlink protection from wechat
|
34
|
-
def image
|
35
|
-
if banner_image = extract_script_value("msg_cdn_url")
|
36
|
-
return banner_image
|
37
|
-
end
|
38
|
-
|
39
|
-
if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
|
40
|
-
attributes = main_image.first.attributes
|
41
|
-
|
42
|
-
return attributes["data-src"].to_s if attributes["data-src"]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def data
|
47
|
-
title = CGI.unescapeHTML(raw.css("title").inner_text)
|
48
|
-
by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
|
49
|
-
|
50
|
-
result = {
|
51
|
-
link: extract_script_value("msg_link") || link,
|
52
|
-
title: title,
|
53
|
-
image: image,
|
54
|
-
description: extract_script_value("msg_desc"),
|
55
|
-
by_info: by_info
|
56
|
-
}
|
57
|
-
|
58
|
-
result
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
data/templates/wechatmp.mustache
DELETED