onebox 1.8.81 → 1.8.82
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/.rspec +0 -0
- data/.rubocop.yml +0 -0
- data/.ruby-gemset +0 -0
- data/.travis.yml +0 -0
- data/CHANGELOG.md +26 -26
- data/Gemfile +0 -0
- data/Gemfile.lock +154 -154
- data/Guardfile +0 -0
- data/LICENSE.txt +0 -0
- data/README.md +223 -223
- data/Rakefile +0 -0
- data/lib/onebox.rb +0 -0
- data/lib/onebox/engine.rb +188 -188
- data/lib/onebox/engine/amazon_onebox.rb +167 -167
- data/lib/onebox/engine/asciinema_onebox.rb +0 -0
- data/lib/onebox/engine/audio_onebox.rb +0 -0
- data/lib/onebox/engine/audioboom_onebox.rb +24 -24
- data/lib/onebox/engine/bandcamp_onebox.rb +32 -32
- data/lib/onebox/engine/cloudapp_onebox.rb +51 -51
- data/lib/onebox/engine/coub_onebox.rb +21 -21
- data/lib/onebox/engine/douban_onebox.rb +0 -0
- data/lib/onebox/engine/five_hundred_px_onebox.rb +17 -17
- data/lib/onebox/engine/flickr_onebox.rb +0 -0
- data/lib/onebox/engine/flickr_shortened_onebox.rb +0 -0
- data/lib/onebox/engine/gfycat_onebox.rb +0 -0
- data/lib/onebox/engine/giphy_onebox.rb +22 -22
- data/lib/onebox/engine/github_blob_onebox.rb +0 -0
- data/lib/onebox/engine/github_commit_onebox.rb +0 -0
- data/lib/onebox/engine/github_gist_onebox.rb +0 -0
- data/lib/onebox/engine/github_issue_onebox.rb +0 -0
- data/lib/onebox/engine/github_pullrequest_onebox.rb +0 -0
- data/lib/onebox/engine/gitlab_blob_onebox.rb +0 -0
- data/lib/onebox/engine/google_calendar_onebox.rb +0 -0
- data/lib/onebox/engine/google_docs_onebox.rb +0 -0
- data/lib/onebox/engine/google_maps_onebox.rb +0 -0
- data/lib/onebox/engine/google_photos_onebox.rb +57 -57
- data/lib/onebox/engine/google_play_app_onebox.rb +0 -0
- data/lib/onebox/engine/html.rb +0 -0
- data/lib/onebox/engine/image_onebox.rb +0 -0
- data/lib/onebox/engine/imgur_onebox.rb +65 -65
- data/lib/onebox/engine/instagram_onebox.rb +32 -32
- data/lib/onebox/engine/json.rb +0 -0
- data/lib/onebox/engine/kaltura_onebox.rb +31 -31
- data/lib/onebox/engine/mixcloud_onebox.rb +20 -20
- data/lib/onebox/engine/opengraph_image.rb +12 -12
- data/lib/onebox/engine/pastebin_onebox.rb +0 -0
- data/lib/onebox/engine/pdf_onebox.rb +0 -0
- data/lib/onebox/engine/pubmed_onebox.rb +0 -0
- data/lib/onebox/engine/replit_onebox.rb +24 -24
- data/lib/onebox/engine/sketchfab_onebox.rb +31 -31
- data/lib/onebox/engine/slides_onebox.rb +0 -0
- data/lib/onebox/engine/soundcloud_onebox.rb +31 -31
- data/lib/onebox/engine/stack_exchange_onebox.rb +0 -0
- data/lib/onebox/engine/standard_embed.rb +145 -145
- data/lib/onebox/engine/steam_store_onebox.rb +37 -37
- data/lib/onebox/engine/trello_onebox.rb +0 -0
- data/lib/onebox/engine/twitch_clips_onebox.rb +0 -0
- data/lib/onebox/engine/twitch_stream_onebox.rb +0 -0
- data/lib/onebox/engine/twitch_video_onebox.rb +0 -0
- data/lib/onebox/engine/twitter_status_onebox.rb +0 -0
- data/lib/onebox/engine/typeform_onebox.rb +41 -41
- data/lib/onebox/engine/video_onebox.rb +0 -0
- data/lib/onebox/engine/vimeo_onebox.rb +20 -20
- data/lib/onebox/engine/wechat_mp_onebox.rb +0 -0
- data/lib/onebox/engine/whitelisted_generic_onebox.rb +366 -366
- data/lib/onebox/engine/wikimedia_onebox.rb +0 -0
- data/lib/onebox/engine/wikipedia_onebox.rb +0 -0
- data/lib/onebox/engine/wistia_onebox.rb +27 -27
- data/lib/onebox/engine/xkcd_onebox.rb +0 -0
- data/lib/onebox/engine/youku_onebox.rb +0 -0
- data/lib/onebox/engine/youtube_onebox.rb +163 -163
- data/lib/onebox/file_type_finder.rb +0 -0
- data/lib/onebox/helpers.rb +188 -188
- data/lib/onebox/layout.rb +0 -0
- data/lib/onebox/layout_support.rb +0 -0
- data/lib/onebox/matcher.rb +0 -0
- data/lib/onebox/mixins/git_blob_onebox.rb +1 -1
- data/lib/onebox/mixins/twitch_onebox.rb +0 -0
- data/lib/onebox/oembed.rb +15 -15
- data/lib/onebox/open_graph.rb +90 -90
- data/lib/onebox/preview.rb +0 -0
- data/lib/onebox/sanitize_config.rb +0 -0
- data/lib/onebox/status_check.rb +0 -0
- data/lib/onebox/template_support.rb +0 -0
- data/lib/onebox/version.rb +5 -5
- data/lib/onebox/view.rb +0 -0
- data/lib/onebox/web.rb +0 -0
- data/lib/onebox/web_helpers.rb +0 -0
- data/onebox.gemspec +0 -0
- data/templates/_layout.mustache +0 -0
- data/templates/amazon.mustache +0 -0
- data/templates/douban.mustache +0 -0
- data/templates/githubblob.mustache +1 -1
- data/templates/githubcommit.mustache +0 -0
- data/templates/githubgist.mustache +0 -0
- data/templates/githubissue.mustache +0 -0
- data/templates/githubpullrequest.mustache +0 -0
- data/templates/gitlabblob.mustache +0 -0
- data/templates/googledocs.mustache +0 -0
- data/templates/googleplayapp.mustache +0 -0
- data/templates/instagram.mustache +0 -0
- data/templates/pastebin.mustache +0 -0
- data/templates/pdf.mustache +0 -0
- data/templates/pubmed.mustache +0 -0
- data/templates/stackexchange.mustache +0 -0
- data/templates/twitterstatus.mustache +0 -0
- data/templates/wechatmp.mustache +0 -0
- data/templates/whitelistedgeneric.mustache +0 -0
- data/templates/wikimedia.mustache +0 -0
- data/templates/wikipedia.mustache +0 -0
- data/templates/xkcd.mustache +0 -0
- metadata +3 -4
File without changes
|
File without changes
|
@@ -1,27 +1,27 @@
|
|
1
|
-
module Onebox
|
2
|
-
module Engine
|
3
|
-
class WistiaOnebox
|
4
|
-
include Engine
|
5
|
-
include StandardEmbed
|
6
|
-
|
7
|
-
matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
|
8
|
-
always_https
|
9
|
-
|
10
|
-
def to_html
|
11
|
-
get_oembed.html
|
12
|
-
end
|
13
|
-
|
14
|
-
def placeholder_html
|
15
|
-
oembed = get_oembed
|
16
|
-
return if Onebox::Helpers.blank?(oembed.thumbnail_url)
|
17
|
-
"<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
|
22
|
-
def get_oembed_url
|
23
|
-
"https://fast.wistia.com/oembed?embedType=iframe&url=#{url}"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
1
|
+
module Onebox
|
2
|
+
module Engine
|
3
|
+
class WistiaOnebox
|
4
|
+
include Engine
|
5
|
+
include StandardEmbed
|
6
|
+
|
7
|
+
matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
|
8
|
+
always_https
|
9
|
+
|
10
|
+
def to_html
|
11
|
+
get_oembed.html
|
12
|
+
end
|
13
|
+
|
14
|
+
def placeholder_html
|
15
|
+
oembed = get_oembed
|
16
|
+
return if Onebox::Helpers.blank?(oembed.thumbnail_url)
|
17
|
+
"<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def get_oembed_url
|
23
|
+
"https://fast.wistia.com/oembed?embedType=iframe&url=#{url}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
File without changes
|
File without changes
|
@@ -1,163 +1,163 @@
|
|
1
|
-
require 'onebox/oembed'
|
2
|
-
|
3
|
-
module Onebox
|
4
|
-
module Engine
|
5
|
-
class YoutubeOnebox
|
6
|
-
include Engine
|
7
|
-
include StandardEmbed
|
8
|
-
|
9
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
|
10
|
-
always_https
|
11
|
-
|
12
|
-
WIDTH ||= 480
|
13
|
-
HEIGHT ||= 360
|
14
|
-
|
15
|
-
def placeholder_html
|
16
|
-
if video_id
|
17
|
-
"<img src='https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg' width='#{WIDTH}' height='#{HEIGHT}' #{video_oembed_data.title_attr}>"
|
18
|
-
elsif list_id
|
19
|
-
"<img src='#{list_thumbnail_url}' width='#{WIDTH}' height='#{HEIGHT}' #{list_oembed_data.title_attr}>"
|
20
|
-
else
|
21
|
-
to_html
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_html
|
26
|
-
if video_id
|
27
|
-
<<-HTML
|
28
|
-
<iframe width="#{WIDTH}"
|
29
|
-
height="#{HEIGHT}"
|
30
|
-
src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
|
31
|
-
frameborder="0"
|
32
|
-
allowfullscreen>
|
33
|
-
</iframe>
|
34
|
-
HTML
|
35
|
-
elsif list_id
|
36
|
-
<<-HTML
|
37
|
-
<iframe width="#{WIDTH}"
|
38
|
-
height="#{HEIGHT}"
|
39
|
-
src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
|
40
|
-
frameborder="0"
|
41
|
-
allowfullscreen>
|
42
|
-
</iframe>
|
43
|
-
HTML
|
44
|
-
else
|
45
|
-
# for channel pages
|
46
|
-
html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @cache, @timeout).to_html
|
47
|
-
return if Onebox::Helpers.blank?(html)
|
48
|
-
html.gsub!(/['"]\/\//, "https://")
|
49
|
-
html
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def video_title
|
54
|
-
@video_title ||= video_oembed_data.title
|
55
|
-
end
|
56
|
-
|
57
|
-
private
|
58
|
-
|
59
|
-
def video_id
|
60
|
-
@video_id ||= begin
|
61
|
-
# http://youtu.be/afyK1HSFfgw
|
62
|
-
if uri.host["youtu.be"]
|
63
|
-
id = uri.path[/\/([\w\-]+)/, 1]
|
64
|
-
return id if id
|
65
|
-
end
|
66
|
-
|
67
|
-
# https://www.youtube.com/embed/vsF0K3Ou1v0
|
68
|
-
if uri.path["/embed/"]
|
69
|
-
id = uri.path[/\/embed\/([\w\-]+)/, 1]
|
70
|
-
return id if id
|
71
|
-
end
|
72
|
-
|
73
|
-
# https://www.youtube.com/watch?v=Z0UISCEe52Y
|
74
|
-
params['v']
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def list_id
|
79
|
-
@list_id ||= params['list']
|
80
|
-
end
|
81
|
-
|
82
|
-
def list_thumbnail_url
|
83
|
-
@list_thumbnail_url ||= begin
|
84
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
85
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
86
|
-
data = Onebox::Oembed.new(response)
|
87
|
-
data.thumbnail_url
|
88
|
-
rescue
|
89
|
-
nil
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def video_oembed_data
|
94
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
|
95
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
96
|
-
Onebox::Oembed.new(response)
|
97
|
-
end
|
98
|
-
|
99
|
-
def list_oembed_data
|
100
|
-
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
101
|
-
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
102
|
-
Onebox::Oembed.new(response)
|
103
|
-
end
|
104
|
-
|
105
|
-
def embed_params
|
106
|
-
p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
|
107
|
-
|
108
|
-
p['list'] = list_id if list_id
|
109
|
-
|
110
|
-
# Parse timestrings, and assign the result as a start= parameter
|
111
|
-
start = if params['start']
|
112
|
-
params['start']
|
113
|
-
elsif params['t']
|
114
|
-
params['t']
|
115
|
-
elsif uri.fragment && uri.fragment.start_with?('t=')
|
116
|
-
# referencing uri is safe here because any throws were already caught by video_id returning nil
|
117
|
-
# remove the t= from the start
|
118
|
-
uri.fragment[2..-1]
|
119
|
-
end
|
120
|
-
|
121
|
-
p['start'] = parse_timestring(start) if start
|
122
|
-
p['end'] = parse_timestring params['end'] if params['end']
|
123
|
-
|
124
|
-
# Official workaround for looping videos
|
125
|
-
# https://developers.google.com/youtube/player_parameters#loop
|
126
|
-
# use params.include? so that you can just add "&loop"
|
127
|
-
if params.include?('loop')
|
128
|
-
p['loop'] = 1
|
129
|
-
p['playlist'] = video_id
|
130
|
-
end
|
131
|
-
|
132
|
-
# https://developers.google.com/youtube/player_parameters#rel
|
133
|
-
p['rel'] = 0 if params.include?('rel')
|
134
|
-
|
135
|
-
URI.encode_www_form(p)
|
136
|
-
end
|
137
|
-
|
138
|
-
def parse_timestring(string)
|
139
|
-
if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
|
140
|
-
($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def params
|
145
|
-
return {} unless uri.query
|
146
|
-
# This mapping is necessary because CGI.parse returns a hash of keys to arrays.
|
147
|
-
# And *that* is necessary because querystrings support arrays, so they
|
148
|
-
# force you to deal with it to avoid security issues that would pop up
|
149
|
-
# if one day it suddenly gave you an array.
|
150
|
-
#
|
151
|
-
# However, we aren't interested. Just take the first one.
|
152
|
-
@params ||= begin
|
153
|
-
p = {}
|
154
|
-
CGI.parse(uri.query).each { |k, v| p[k] = v.first }
|
155
|
-
p
|
156
|
-
end
|
157
|
-
rescue
|
158
|
-
{}
|
159
|
-
end
|
160
|
-
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|
1
|
+
require 'onebox/oembed'
|
2
|
+
|
3
|
+
module Onebox
|
4
|
+
module Engine
|
5
|
+
class YoutubeOnebox
|
6
|
+
include Engine
|
7
|
+
include StandardEmbed
|
8
|
+
|
9
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
|
10
|
+
always_https
|
11
|
+
|
12
|
+
WIDTH ||= 480
|
13
|
+
HEIGHT ||= 360
|
14
|
+
|
15
|
+
def placeholder_html
|
16
|
+
if video_id
|
17
|
+
"<img src='https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg' width='#{WIDTH}' height='#{HEIGHT}' #{video_oembed_data.title_attr}>"
|
18
|
+
elsif list_id
|
19
|
+
"<img src='#{list_thumbnail_url}' width='#{WIDTH}' height='#{HEIGHT}' #{list_oembed_data.title_attr}>"
|
20
|
+
else
|
21
|
+
to_html
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_html
|
26
|
+
if video_id
|
27
|
+
<<-HTML
|
28
|
+
<iframe width="#{WIDTH}"
|
29
|
+
height="#{HEIGHT}"
|
30
|
+
src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
|
31
|
+
frameborder="0"
|
32
|
+
allowfullscreen>
|
33
|
+
</iframe>
|
34
|
+
HTML
|
35
|
+
elsif list_id
|
36
|
+
<<-HTML
|
37
|
+
<iframe width="#{WIDTH}"
|
38
|
+
height="#{HEIGHT}"
|
39
|
+
src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
|
40
|
+
frameborder="0"
|
41
|
+
allowfullscreen>
|
42
|
+
</iframe>
|
43
|
+
HTML
|
44
|
+
else
|
45
|
+
# for channel pages
|
46
|
+
html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @cache, @timeout).to_html
|
47
|
+
return if Onebox::Helpers.blank?(html)
|
48
|
+
html.gsub!(/['"]\/\//, "https://")
|
49
|
+
html
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def video_title
|
54
|
+
@video_title ||= video_oembed_data.title
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def video_id
|
60
|
+
@video_id ||= begin
|
61
|
+
# http://youtu.be/afyK1HSFfgw
|
62
|
+
if uri.host["youtu.be"]
|
63
|
+
id = uri.path[/\/([\w\-]+)/, 1]
|
64
|
+
return id if id
|
65
|
+
end
|
66
|
+
|
67
|
+
# https://www.youtube.com/embed/vsF0K3Ou1v0
|
68
|
+
if uri.path["/embed/"]
|
69
|
+
id = uri.path[/\/embed\/([\w\-]+)/, 1]
|
70
|
+
return id if id
|
71
|
+
end
|
72
|
+
|
73
|
+
# https://www.youtube.com/watch?v=Z0UISCEe52Y
|
74
|
+
params['v']
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def list_id
|
79
|
+
@list_id ||= params['list']
|
80
|
+
end
|
81
|
+
|
82
|
+
def list_thumbnail_url
|
83
|
+
@list_thumbnail_url ||= begin
|
84
|
+
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
85
|
+
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
86
|
+
data = Onebox::Oembed.new(response)
|
87
|
+
data.thumbnail_url
|
88
|
+
rescue
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def video_oembed_data
|
94
|
+
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
|
95
|
+
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
96
|
+
Onebox::Oembed.new(response)
|
97
|
+
end
|
98
|
+
|
99
|
+
def list_oembed_data
|
100
|
+
url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
|
101
|
+
response = Onebox::Helpers.fetch_response(url) rescue "{}"
|
102
|
+
Onebox::Oembed.new(response)
|
103
|
+
end
|
104
|
+
|
105
|
+
def embed_params
|
106
|
+
p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
|
107
|
+
|
108
|
+
p['list'] = list_id if list_id
|
109
|
+
|
110
|
+
# Parse timestrings, and assign the result as a start= parameter
|
111
|
+
start = if params['start']
|
112
|
+
params['start']
|
113
|
+
elsif params['t']
|
114
|
+
params['t']
|
115
|
+
elsif uri.fragment && uri.fragment.start_with?('t=')
|
116
|
+
# referencing uri is safe here because any throws were already caught by video_id returning nil
|
117
|
+
# remove the t= from the start
|
118
|
+
uri.fragment[2..-1]
|
119
|
+
end
|
120
|
+
|
121
|
+
p['start'] = parse_timestring(start) if start
|
122
|
+
p['end'] = parse_timestring params['end'] if params['end']
|
123
|
+
|
124
|
+
# Official workaround for looping videos
|
125
|
+
# https://developers.google.com/youtube/player_parameters#loop
|
126
|
+
# use params.include? so that you can just add "&loop"
|
127
|
+
if params.include?('loop')
|
128
|
+
p['loop'] = 1
|
129
|
+
p['playlist'] = video_id
|
130
|
+
end
|
131
|
+
|
132
|
+
# https://developers.google.com/youtube/player_parameters#rel
|
133
|
+
p['rel'] = 0 if params.include?('rel')
|
134
|
+
|
135
|
+
URI.encode_www_form(p)
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse_timestring(string)
|
139
|
+
if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
|
140
|
+
($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def params
|
145
|
+
return {} unless uri.query
|
146
|
+
# This mapping is necessary because CGI.parse returns a hash of keys to arrays.
|
147
|
+
# And *that* is necessary because querystrings support arrays, so they
|
148
|
+
# force you to deal with it to avoid security issues that would pop up
|
149
|
+
# if one day it suddenly gave you an array.
|
150
|
+
#
|
151
|
+
# However, we aren't interested. Just take the first one.
|
152
|
+
@params ||= begin
|
153
|
+
p = {}
|
154
|
+
CGI.parse(uri.query).each { |k, v| p[k] = v.first }
|
155
|
+
p
|
156
|
+
end
|
157
|
+
rescue
|
158
|
+
{}
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
File without changes
|
data/lib/onebox/helpers.rb
CHANGED
@@ -1,188 +1,188 @@
|
|
1
|
-
module Onebox
|
2
|
-
module Helpers
|
3
|
-
|
4
|
-
class DownloadTooLarge < StandardError; end
|
5
|
-
|
6
|
-
def self.symbolize_keys(hash)
|
7
|
-
return {} if hash.nil?
|
8
|
-
|
9
|
-
hash.inject({}) do |result, (key, value)|
|
10
|
-
new_key = key.is_a?(String) ? key.to_sym : key
|
11
|
-
new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
|
12
|
-
result[new_key] = new_value
|
13
|
-
result
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.clean(html)
|
18
|
-
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.fetch_html_doc(url, headers = nil)
|
22
|
-
response = (fetch_response(url, nil, nil, headers) rescue nil)
|
23
|
-
doc = Nokogiri::HTML(response)
|
24
|
-
|
25
|
-
ignore_canonical = doc.at('meta[property="og:ignore_canonical"]')
|
26
|
-
unless ignore_canonical && ignore_canonical['content'].to_s == 'true'
|
27
|
-
# prefer canonical link
|
28
|
-
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
29
|
-
if canonical_link && "#{URI(canonical_link).host}#{URI(canonical_link).path}" != "#{URI(url).host}#{URI(url).path}"
|
30
|
-
response = (fetch_response(canonical_link, nil, nil, headers) rescue nil)
|
31
|
-
doc = Nokogiri::HTML(response) if response
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
doc
|
36
|
-
end
|
37
|
-
|
38
|
-
def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
|
39
|
-
|
40
|
-
limit ||= 5
|
41
|
-
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
42
|
-
|
43
|
-
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
44
|
-
|
45
|
-
uri = URI(location)
|
46
|
-
uri = URI("#{domain}#{location}") if !uri.host
|
47
|
-
|
48
|
-
result = StringIO.new
|
49
|
-
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
|
50
|
-
http.open_timeout = Onebox.options.connect_timeout
|
51
|
-
http.read_timeout = Onebox.options.timeout
|
52
|
-
if uri.is_a?(URI::HTTPS)
|
53
|
-
http.use_ssl = true
|
54
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
55
|
-
end
|
56
|
-
|
57
|
-
headers ||= {}
|
58
|
-
|
59
|
-
if Onebox.options.user_agent && !headers['User-Agent']
|
60
|
-
headers['User-Agent'] = Onebox.options.user_agent
|
61
|
-
end
|
62
|
-
|
63
|
-
request = Net::HTTP::Get.new(uri.request_uri, headers)
|
64
|
-
start_time = Time.now
|
65
|
-
|
66
|
-
size_bytes = Onebox.options.max_download_kb * 1024
|
67
|
-
http.request(request) do |response|
|
68
|
-
|
69
|
-
if cookie = response.get_fields('set-cookie')
|
70
|
-
header = { 'Cookie' => cookie.join }
|
71
|
-
end
|
72
|
-
|
73
|
-
header = nil unless header.is_a? Hash
|
74
|
-
|
75
|
-
code = response.code.to_i
|
76
|
-
unless code === 200
|
77
|
-
response.error! unless [301, 302].include?(code)
|
78
|
-
return fetch_response(
|
79
|
-
response['location'],
|
80
|
-
limit - 1,
|
81
|
-
"#{uri.scheme}://#{uri.host}",
|
82
|
-
header
|
83
|
-
)
|
84
|
-
end
|
85
|
-
|
86
|
-
response.read_body do |chunk|
|
87
|
-
result.write(chunk)
|
88
|
-
raise DownloadTooLarge.new if result.size > size_bytes
|
89
|
-
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
90
|
-
end
|
91
|
-
|
92
|
-
return result.string
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def self.fetch_content_length(location)
|
98
|
-
uri = URI(location)
|
99
|
-
|
100
|
-
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
|
101
|
-
http.open_timeout = Onebox.options.connect_timeout
|
102
|
-
http.read_timeout = Onebox.options.timeout
|
103
|
-
if uri.is_a?(URI::HTTPS)
|
104
|
-
http.use_ssl = true
|
105
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
106
|
-
end
|
107
|
-
|
108
|
-
http.request_head([uri.path, uri.query].join("?")) do |response|
|
109
|
-
code = response.code.to_i
|
110
|
-
unless code === 200 || Onebox::Helpers.blank?(response.header['content-length'])
|
111
|
-
return nil
|
112
|
-
end
|
113
|
-
return response.header['content-length']
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.pretty_filesize(size)
|
119
|
-
conv = [ 'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB' ]
|
120
|
-
scale = 1024
|
121
|
-
|
122
|
-
ndx = 1
|
123
|
-
if (size < 2 * (scale**ndx)) then
|
124
|
-
return "#{(size)} #{conv[ndx - 1]}"
|
125
|
-
end
|
126
|
-
size = size.to_f
|
127
|
-
[2, 3, 4, 5, 6, 7].each do |i|
|
128
|
-
if (size < 2 * (scale**i)) then
|
129
|
-
return "#{'%.2f' % (size / (scale**(i - 1)))} #{conv[i - 1]}"
|
130
|
-
end
|
131
|
-
end
|
132
|
-
ndx = 7
|
133
|
-
return "#{'%.2f' % (size / (scale**(ndx - 1)))} #{conv[ndx - 1]}"
|
134
|
-
end
|
135
|
-
|
136
|
-
def self.click_to_scroll_div(width = 690, height = 400)
|
137
|
-
"<div style=\"background:transparent;position:relative;width:#{width}px;height:#{height}px;top:#{height}px;margin-top:-#{height}px;\" onClick=\"style.pointerEvents='none'\"></div>"
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.blank?(value)
|
141
|
-
if value.nil?
|
142
|
-
true
|
143
|
-
elsif String === value
|
144
|
-
value.empty? || !(/[[:^space:]]/ === value)
|
145
|
-
else
|
146
|
-
value.respond_to?(:empty?) ? !!value.empty? : !value
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
def self.truncate(string, length = 50)
|
151
|
-
string.size > length ? string[0...(string.rindex(" ", length) || length)] + "..." : string
|
152
|
-
end
|
153
|
-
|
154
|
-
def self.get(meta, attr)
|
155
|
-
(meta && !blank?(meta[attr])) ? sanitize(meta[attr]) : nil
|
156
|
-
end
|
157
|
-
|
158
|
-
def self.sanitize(value, length = 50)
|
159
|
-
return nil if blank?(value)
|
160
|
-
Sanitize.fragment(value).strip
|
161
|
-
end
|
162
|
-
|
163
|
-
def self.normalize_url_for_output(url)
|
164
|
-
return "" unless url
|
165
|
-
url = url.dup
|
166
|
-
# expect properly encoded url, remove any unsafe chars
|
167
|
-
url.gsub!("'", "'")
|
168
|
-
url.gsub!('"', """)
|
169
|
-
url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%]/, "")
|
170
|
-
url
|
171
|
-
end
|
172
|
-
|
173
|
-
def self.get_absolute_image_url(src, url)
|
174
|
-
if src && !!(src =~ /^\/\//)
|
175
|
-
uri = URI(url)
|
176
|
-
src = "#{uri.scheme}:#{src}"
|
177
|
-
elsif src && src.match(/^https?:\/\//i).nil?
|
178
|
-
uri = URI(url)
|
179
|
-
src = if !src.start_with?("/") && uri.path.present?
|
180
|
-
"#{uri.scheme}://#{uri.host.sub(/\/$/, '')}#{uri.path.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
|
181
|
-
else
|
182
|
-
"#{uri.scheme}://#{uri.host.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
|
183
|
-
end
|
184
|
-
end
|
185
|
-
src
|
186
|
-
end
|
187
|
-
end
|
188
|
-
end
|
1
|
+
module Onebox
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
class DownloadTooLarge < StandardError; end
|
5
|
+
|
6
|
+
def self.symbolize_keys(hash)
|
7
|
+
return {} if hash.nil?
|
8
|
+
|
9
|
+
hash.inject({}) do |result, (key, value)|
|
10
|
+
new_key = key.is_a?(String) ? key.to_sym : key
|
11
|
+
new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
|
12
|
+
result[new_key] = new_value
|
13
|
+
result
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.clean(html)
|
18
|
+
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.fetch_html_doc(url, headers = nil)
|
22
|
+
response = (fetch_response(url, nil, nil, headers) rescue nil)
|
23
|
+
doc = Nokogiri::HTML(response)
|
24
|
+
|
25
|
+
ignore_canonical = doc.at('meta[property="og:ignore_canonical"]')
|
26
|
+
unless ignore_canonical && ignore_canonical['content'].to_s == 'true'
|
27
|
+
# prefer canonical link
|
28
|
+
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
29
|
+
if canonical_link && "#{URI(canonical_link).host}#{URI(canonical_link).path}" != "#{URI(url).host}#{URI(url).path}"
|
30
|
+
response = (fetch_response(canonical_link, nil, nil, headers) rescue nil)
|
31
|
+
doc = Nokogiri::HTML(response) if response
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
doc
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
|
39
|
+
|
40
|
+
limit ||= 5
|
41
|
+
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
|
42
|
+
|
43
|
+
raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
|
44
|
+
|
45
|
+
uri = URI(location)
|
46
|
+
uri = URI("#{domain}#{location}") if !uri.host
|
47
|
+
|
48
|
+
result = StringIO.new
|
49
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
|
50
|
+
http.open_timeout = Onebox.options.connect_timeout
|
51
|
+
http.read_timeout = Onebox.options.timeout
|
52
|
+
if uri.is_a?(URI::HTTPS)
|
53
|
+
http.use_ssl = true
|
54
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
55
|
+
end
|
56
|
+
|
57
|
+
headers ||= {}
|
58
|
+
|
59
|
+
if Onebox.options.user_agent && !headers['User-Agent']
|
60
|
+
headers['User-Agent'] = Onebox.options.user_agent
|
61
|
+
end
|
62
|
+
|
63
|
+
request = Net::HTTP::Get.new(uri.request_uri, headers)
|
64
|
+
start_time = Time.now
|
65
|
+
|
66
|
+
size_bytes = Onebox.options.max_download_kb * 1024
|
67
|
+
http.request(request) do |response|
|
68
|
+
|
69
|
+
if cookie = response.get_fields('set-cookie')
|
70
|
+
header = { 'Cookie' => cookie.join }
|
71
|
+
end
|
72
|
+
|
73
|
+
header = nil unless header.is_a? Hash
|
74
|
+
|
75
|
+
code = response.code.to_i
|
76
|
+
unless code === 200
|
77
|
+
response.error! unless [301, 302].include?(code)
|
78
|
+
return fetch_response(
|
79
|
+
response['location'],
|
80
|
+
limit - 1,
|
81
|
+
"#{uri.scheme}://#{uri.host}",
|
82
|
+
header
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
response.read_body do |chunk|
|
87
|
+
result.write(chunk)
|
88
|
+
raise DownloadTooLarge.new if result.size > size_bytes
|
89
|
+
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
|
90
|
+
end
|
91
|
+
|
92
|
+
return result.string
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.fetch_content_length(location)
|
98
|
+
uri = URI(location)
|
99
|
+
|
100
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
|
101
|
+
http.open_timeout = Onebox.options.connect_timeout
|
102
|
+
http.read_timeout = Onebox.options.timeout
|
103
|
+
if uri.is_a?(URI::HTTPS)
|
104
|
+
http.use_ssl = true
|
105
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
106
|
+
end
|
107
|
+
|
108
|
+
http.request_head([uri.path, uri.query].join("?")) do |response|
|
109
|
+
code = response.code.to_i
|
110
|
+
unless code === 200 || Onebox::Helpers.blank?(response.header['content-length'])
|
111
|
+
return nil
|
112
|
+
end
|
113
|
+
return response.header['content-length']
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.pretty_filesize(size)
|
119
|
+
conv = [ 'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB' ]
|
120
|
+
scale = 1024
|
121
|
+
|
122
|
+
ndx = 1
|
123
|
+
if (size < 2 * (scale**ndx)) then
|
124
|
+
return "#{(size)} #{conv[ndx - 1]}"
|
125
|
+
end
|
126
|
+
size = size.to_f
|
127
|
+
[2, 3, 4, 5, 6, 7].each do |i|
|
128
|
+
if (size < 2 * (scale**i)) then
|
129
|
+
return "#{'%.2f' % (size / (scale**(i - 1)))} #{conv[i - 1]}"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
ndx = 7
|
133
|
+
return "#{'%.2f' % (size / (scale**(ndx - 1)))} #{conv[ndx - 1]}"
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.click_to_scroll_div(width = 690, height = 400)
|
137
|
+
"<div style=\"background:transparent;position:relative;width:#{width}px;height:#{height}px;top:#{height}px;margin-top:-#{height}px;\" onClick=\"style.pointerEvents='none'\"></div>"
|
138
|
+
end
|
139
|
+
|
140
|
+
def self.blank?(value)
|
141
|
+
if value.nil?
|
142
|
+
true
|
143
|
+
elsif String === value
|
144
|
+
value.empty? || !(/[[:^space:]]/ === value)
|
145
|
+
else
|
146
|
+
value.respond_to?(:empty?) ? !!value.empty? : !value
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.truncate(string, length = 50)
|
151
|
+
string.size > length ? string[0...(string.rindex(" ", length) || length)] + "..." : string
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.get(meta, attr)
|
155
|
+
(meta && !blank?(meta[attr])) ? sanitize(meta[attr]) : nil
|
156
|
+
end
|
157
|
+
|
158
|
+
def self.sanitize(value, length = 50)
|
159
|
+
return nil if blank?(value)
|
160
|
+
Sanitize.fragment(value).strip
|
161
|
+
end
|
162
|
+
|
163
|
+
def self.normalize_url_for_output(url)
|
164
|
+
return "" unless url
|
165
|
+
url = url.dup
|
166
|
+
# expect properly encoded url, remove any unsafe chars
|
167
|
+
url.gsub!("'", "'")
|
168
|
+
url.gsub!('"', """)
|
169
|
+
url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%]/, "")
|
170
|
+
url
|
171
|
+
end
|
172
|
+
|
173
|
+
def self.get_absolute_image_url(src, url)
|
174
|
+
if src && !!(src =~ /^\/\//)
|
175
|
+
uri = URI(url)
|
176
|
+
src = "#{uri.scheme}:#{src}"
|
177
|
+
elsif src && src.match(/^https?:\/\//i).nil?
|
178
|
+
uri = URI(url)
|
179
|
+
src = if !src.start_with?("/") && uri.path.present?
|
180
|
+
"#{uri.scheme}://#{uri.host.sub(/\/$/, '')}#{uri.path.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
|
181
|
+
else
|
182
|
+
"#{uri.scheme}://#{uri.host.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
src
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|