onebox 1.8.81 → 1.8.82

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -0
  3. data/.rspec +0 -0
  4. data/.rubocop.yml +0 -0
  5. data/.ruby-gemset +0 -0
  6. data/.travis.yml +0 -0
  7. data/CHANGELOG.md +26 -26
  8. data/Gemfile +0 -0
  9. data/Gemfile.lock +154 -154
  10. data/Guardfile +0 -0
  11. data/LICENSE.txt +0 -0
  12. data/README.md +223 -223
  13. data/Rakefile +0 -0
  14. data/lib/onebox.rb +0 -0
  15. data/lib/onebox/engine.rb +188 -188
  16. data/lib/onebox/engine/amazon_onebox.rb +167 -167
  17. data/lib/onebox/engine/asciinema_onebox.rb +0 -0
  18. data/lib/onebox/engine/audio_onebox.rb +0 -0
  19. data/lib/onebox/engine/audioboom_onebox.rb +24 -24
  20. data/lib/onebox/engine/bandcamp_onebox.rb +32 -32
  21. data/lib/onebox/engine/cloudapp_onebox.rb +51 -51
  22. data/lib/onebox/engine/coub_onebox.rb +21 -21
  23. data/lib/onebox/engine/douban_onebox.rb +0 -0
  24. data/lib/onebox/engine/five_hundred_px_onebox.rb +17 -17
  25. data/lib/onebox/engine/flickr_onebox.rb +0 -0
  26. data/lib/onebox/engine/flickr_shortened_onebox.rb +0 -0
  27. data/lib/onebox/engine/gfycat_onebox.rb +0 -0
  28. data/lib/onebox/engine/giphy_onebox.rb +22 -22
  29. data/lib/onebox/engine/github_blob_onebox.rb +0 -0
  30. data/lib/onebox/engine/github_commit_onebox.rb +0 -0
  31. data/lib/onebox/engine/github_gist_onebox.rb +0 -0
  32. data/lib/onebox/engine/github_issue_onebox.rb +0 -0
  33. data/lib/onebox/engine/github_pullrequest_onebox.rb +0 -0
  34. data/lib/onebox/engine/gitlab_blob_onebox.rb +0 -0
  35. data/lib/onebox/engine/google_calendar_onebox.rb +0 -0
  36. data/lib/onebox/engine/google_docs_onebox.rb +0 -0
  37. data/lib/onebox/engine/google_maps_onebox.rb +0 -0
  38. data/lib/onebox/engine/google_photos_onebox.rb +57 -57
  39. data/lib/onebox/engine/google_play_app_onebox.rb +0 -0
  40. data/lib/onebox/engine/html.rb +0 -0
  41. data/lib/onebox/engine/image_onebox.rb +0 -0
  42. data/lib/onebox/engine/imgur_onebox.rb +65 -65
  43. data/lib/onebox/engine/instagram_onebox.rb +32 -32
  44. data/lib/onebox/engine/json.rb +0 -0
  45. data/lib/onebox/engine/kaltura_onebox.rb +31 -31
  46. data/lib/onebox/engine/mixcloud_onebox.rb +20 -20
  47. data/lib/onebox/engine/opengraph_image.rb +12 -12
  48. data/lib/onebox/engine/pastebin_onebox.rb +0 -0
  49. data/lib/onebox/engine/pdf_onebox.rb +0 -0
  50. data/lib/onebox/engine/pubmed_onebox.rb +0 -0
  51. data/lib/onebox/engine/replit_onebox.rb +24 -24
  52. data/lib/onebox/engine/sketchfab_onebox.rb +31 -31
  53. data/lib/onebox/engine/slides_onebox.rb +0 -0
  54. data/lib/onebox/engine/soundcloud_onebox.rb +31 -31
  55. data/lib/onebox/engine/stack_exchange_onebox.rb +0 -0
  56. data/lib/onebox/engine/standard_embed.rb +145 -145
  57. data/lib/onebox/engine/steam_store_onebox.rb +37 -37
  58. data/lib/onebox/engine/trello_onebox.rb +0 -0
  59. data/lib/onebox/engine/twitch_clips_onebox.rb +0 -0
  60. data/lib/onebox/engine/twitch_stream_onebox.rb +0 -0
  61. data/lib/onebox/engine/twitch_video_onebox.rb +0 -0
  62. data/lib/onebox/engine/twitter_status_onebox.rb +0 -0
  63. data/lib/onebox/engine/typeform_onebox.rb +41 -41
  64. data/lib/onebox/engine/video_onebox.rb +0 -0
  65. data/lib/onebox/engine/vimeo_onebox.rb +20 -20
  66. data/lib/onebox/engine/wechat_mp_onebox.rb +0 -0
  67. data/lib/onebox/engine/whitelisted_generic_onebox.rb +366 -366
  68. data/lib/onebox/engine/wikimedia_onebox.rb +0 -0
  69. data/lib/onebox/engine/wikipedia_onebox.rb +0 -0
  70. data/lib/onebox/engine/wistia_onebox.rb +27 -27
  71. data/lib/onebox/engine/xkcd_onebox.rb +0 -0
  72. data/lib/onebox/engine/youku_onebox.rb +0 -0
  73. data/lib/onebox/engine/youtube_onebox.rb +163 -163
  74. data/lib/onebox/file_type_finder.rb +0 -0
  75. data/lib/onebox/helpers.rb +188 -188
  76. data/lib/onebox/layout.rb +0 -0
  77. data/lib/onebox/layout_support.rb +0 -0
  78. data/lib/onebox/matcher.rb +0 -0
  79. data/lib/onebox/mixins/git_blob_onebox.rb +1 -1
  80. data/lib/onebox/mixins/twitch_onebox.rb +0 -0
  81. data/lib/onebox/oembed.rb +15 -15
  82. data/lib/onebox/open_graph.rb +90 -90
  83. data/lib/onebox/preview.rb +0 -0
  84. data/lib/onebox/sanitize_config.rb +0 -0
  85. data/lib/onebox/status_check.rb +0 -0
  86. data/lib/onebox/template_support.rb +0 -0
  87. data/lib/onebox/version.rb +5 -5
  88. data/lib/onebox/view.rb +0 -0
  89. data/lib/onebox/web.rb +0 -0
  90. data/lib/onebox/web_helpers.rb +0 -0
  91. data/onebox.gemspec +0 -0
  92. data/templates/_layout.mustache +0 -0
  93. data/templates/amazon.mustache +0 -0
  94. data/templates/douban.mustache +0 -0
  95. data/templates/githubblob.mustache +1 -1
  96. data/templates/githubcommit.mustache +0 -0
  97. data/templates/githubgist.mustache +0 -0
  98. data/templates/githubissue.mustache +0 -0
  99. data/templates/githubpullrequest.mustache +0 -0
  100. data/templates/gitlabblob.mustache +0 -0
  101. data/templates/googledocs.mustache +0 -0
  102. data/templates/googleplayapp.mustache +0 -0
  103. data/templates/instagram.mustache +0 -0
  104. data/templates/pastebin.mustache +0 -0
  105. data/templates/pdf.mustache +0 -0
  106. data/templates/pubmed.mustache +0 -0
  107. data/templates/stackexchange.mustache +0 -0
  108. data/templates/twitterstatus.mustache +0 -0
  109. data/templates/wechatmp.mustache +0 -0
  110. data/templates/whitelistedgeneric.mustache +0 -0
  111. data/templates/wikimedia.mustache +0 -0
  112. data/templates/wikipedia.mustache +0 -0
  113. data/templates/xkcd.mustache +0 -0
  114. metadata +3 -4
File without changes
File without changes
@@ -1,27 +1,27 @@
1
- module Onebox
2
- module Engine
3
- class WistiaOnebox
4
- include Engine
5
- include StandardEmbed
6
-
7
- matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
8
- always_https
9
-
10
- def to_html
11
- get_oembed.html
12
- end
13
-
14
- def placeholder_html
15
- oembed = get_oembed
16
- return if Onebox::Helpers.blank?(oembed.thumbnail_url)
17
- "<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
18
- end
19
-
20
- private
21
-
22
- def get_oembed_url
23
- "https://fast.wistia.com/oembed?embedType=iframe&url=#{url}"
24
- end
25
- end
26
- end
27
- end
1
+ module Onebox
2
+ module Engine
3
+ class WistiaOnebox
4
+ include Engine
5
+ include StandardEmbed
6
+
7
+ matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
8
+ always_https
9
+
10
+ def to_html
11
+ get_oembed.html
12
+ end
13
+
14
+ def placeholder_html
15
+ oembed = get_oembed
16
+ return if Onebox::Helpers.blank?(oembed.thumbnail_url)
17
+ "<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
18
+ end
19
+
20
+ private
21
+
22
+ def get_oembed_url
23
+ "https://fast.wistia.com/oembed?embedType=iframe&url=#{url}"
24
+ end
25
+ end
26
+ end
27
+ end
File without changes
File without changes
@@ -1,163 +1,163 @@
1
- require 'onebox/oembed'
2
-
3
- module Onebox
4
- module Engine
5
- class YoutubeOnebox
6
- include Engine
7
- include StandardEmbed
8
-
9
- matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
10
- always_https
11
-
12
- WIDTH ||= 480
13
- HEIGHT ||= 360
14
-
15
- def placeholder_html
16
- if video_id
17
- "<img src='https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg' width='#{WIDTH}' height='#{HEIGHT}' #{video_oembed_data.title_attr}>"
18
- elsif list_id
19
- "<img src='#{list_thumbnail_url}' width='#{WIDTH}' height='#{HEIGHT}' #{list_oembed_data.title_attr}>"
20
- else
21
- to_html
22
- end
23
- end
24
-
25
- def to_html
26
- if video_id
27
- <<-HTML
28
- <iframe width="#{WIDTH}"
29
- height="#{HEIGHT}"
30
- src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
31
- frameborder="0"
32
- allowfullscreen>
33
- </iframe>
34
- HTML
35
- elsif list_id
36
- <<-HTML
37
- <iframe width="#{WIDTH}"
38
- height="#{HEIGHT}"
39
- src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
40
- frameborder="0"
41
- allowfullscreen>
42
- </iframe>
43
- HTML
44
- else
45
- # for channel pages
46
- html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @cache, @timeout).to_html
47
- return if Onebox::Helpers.blank?(html)
48
- html.gsub!(/['"]\/\//, "https://")
49
- html
50
- end
51
- end
52
-
53
- def video_title
54
- @video_title ||= video_oembed_data.title
55
- end
56
-
57
- private
58
-
59
- def video_id
60
- @video_id ||= begin
61
- # http://youtu.be/afyK1HSFfgw
62
- if uri.host["youtu.be"]
63
- id = uri.path[/\/([\w\-]+)/, 1]
64
- return id if id
65
- end
66
-
67
- # https://www.youtube.com/embed/vsF0K3Ou1v0
68
- if uri.path["/embed/"]
69
- id = uri.path[/\/embed\/([\w\-]+)/, 1]
70
- return id if id
71
- end
72
-
73
- # https://www.youtube.com/watch?v=Z0UISCEe52Y
74
- params['v']
75
- end
76
- end
77
-
78
- def list_id
79
- @list_id ||= params['list']
80
- end
81
-
82
- def list_thumbnail_url
83
- @list_thumbnail_url ||= begin
84
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
85
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
86
- data = Onebox::Oembed.new(response)
87
- data.thumbnail_url
88
- rescue
89
- nil
90
- end
91
- end
92
-
93
- def video_oembed_data
94
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
95
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
96
- Onebox::Oembed.new(response)
97
- end
98
-
99
- def list_oembed_data
100
- url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
101
- response = Onebox::Helpers.fetch_response(url) rescue "{}"
102
- Onebox::Oembed.new(response)
103
- end
104
-
105
- def embed_params
106
- p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
107
-
108
- p['list'] = list_id if list_id
109
-
110
- # Parse timestrings, and assign the result as a start= parameter
111
- start = if params['start']
112
- params['start']
113
- elsif params['t']
114
- params['t']
115
- elsif uri.fragment && uri.fragment.start_with?('t=')
116
- # referencing uri is safe here because any throws were already caught by video_id returning nil
117
- # remove the t= from the start
118
- uri.fragment[2..-1]
119
- end
120
-
121
- p['start'] = parse_timestring(start) if start
122
- p['end'] = parse_timestring params['end'] if params['end']
123
-
124
- # Official workaround for looping videos
125
- # https://developers.google.com/youtube/player_parameters#loop
126
- # use params.include? so that you can just add "&loop"
127
- if params.include?('loop')
128
- p['loop'] = 1
129
- p['playlist'] = video_id
130
- end
131
-
132
- # https://developers.google.com/youtube/player_parameters#rel
133
- p['rel'] = 0 if params.include?('rel')
134
-
135
- URI.encode_www_form(p)
136
- end
137
-
138
- def parse_timestring(string)
139
- if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
140
- ($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i
141
- end
142
- end
143
-
144
- def params
145
- return {} unless uri.query
146
- # This mapping is necessary because CGI.parse returns a hash of keys to arrays.
147
- # And *that* is necessary because querystrings support arrays, so they
148
- # force you to deal with it to avoid security issues that would pop up
149
- # if one day it suddenly gave you an array.
150
- #
151
- # However, we aren't interested. Just take the first one.
152
- @params ||= begin
153
- p = {}
154
- CGI.parse(uri.query).each { |k, v| p[k] = v.first }
155
- p
156
- end
157
- rescue
158
- {}
159
- end
160
-
161
- end
162
- end
163
- end
1
+ require 'onebox/oembed'
2
+
3
+ module Onebox
4
+ module Engine
5
+ class YoutubeOnebox
6
+ include Engine
7
+ include StandardEmbed
8
+
9
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
10
+ always_https
11
+
12
+ WIDTH ||= 480
13
+ HEIGHT ||= 360
14
+
15
+ def placeholder_html
16
+ if video_id
17
+ "<img src='https://i.ytimg.com/vi/#{video_id}/hqdefault.jpg' width='#{WIDTH}' height='#{HEIGHT}' #{video_oembed_data.title_attr}>"
18
+ elsif list_id
19
+ "<img src='#{list_thumbnail_url}' width='#{WIDTH}' height='#{HEIGHT}' #{list_oembed_data.title_attr}>"
20
+ else
21
+ to_html
22
+ end
23
+ end
24
+
25
+ def to_html
26
+ if video_id
27
+ <<-HTML
28
+ <iframe width="#{WIDTH}"
29
+ height="#{HEIGHT}"
30
+ src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
31
+ frameborder="0"
32
+ allowfullscreen>
33
+ </iframe>
34
+ HTML
35
+ elsif list_id
36
+ <<-HTML
37
+ <iframe width="#{WIDTH}"
38
+ height="#{HEIGHT}"
39
+ src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
40
+ frameborder="0"
41
+ allowfullscreen>
42
+ </iframe>
43
+ HTML
44
+ else
45
+ # for channel pages
46
+ html = Onebox::Engine::WhitelistedGenericOnebox.new(@url, @cache, @timeout).to_html
47
+ return if Onebox::Helpers.blank?(html)
48
+ html.gsub!(/['"]\/\//, "https://")
49
+ html
50
+ end
51
+ end
52
+
53
+ def video_title
54
+ @video_title ||= video_oembed_data.title
55
+ end
56
+
57
+ private
58
+
59
+ def video_id
60
+ @video_id ||= begin
61
+ # http://youtu.be/afyK1HSFfgw
62
+ if uri.host["youtu.be"]
63
+ id = uri.path[/\/([\w\-]+)/, 1]
64
+ return id if id
65
+ end
66
+
67
+ # https://www.youtube.com/embed/vsF0K3Ou1v0
68
+ if uri.path["/embed/"]
69
+ id = uri.path[/\/embed\/([\w\-]+)/, 1]
70
+ return id if id
71
+ end
72
+
73
+ # https://www.youtube.com/watch?v=Z0UISCEe52Y
74
+ params['v']
75
+ end
76
+ end
77
+
78
+ def list_id
79
+ @list_id ||= params['list']
80
+ end
81
+
82
+ def list_thumbnail_url
83
+ @list_thumbnail_url ||= begin
84
+ url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
85
+ response = Onebox::Helpers.fetch_response(url) rescue "{}"
86
+ data = Onebox::Oembed.new(response)
87
+ data.thumbnail_url
88
+ rescue
89
+ nil
90
+ end
91
+ end
92
+
93
+ def video_oembed_data
94
+ url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=#{video_id}"
95
+ response = Onebox::Helpers.fetch_response(url) rescue "{}"
96
+ Onebox::Oembed.new(response)
97
+ end
98
+
99
+ def list_oembed_data
100
+ url = "https://www.youtube.com/oembed?format=json&url=https://www.youtube.com/playlist?list=#{list_id}"
101
+ response = Onebox::Helpers.fetch_response(url) rescue "{}"
102
+ Onebox::Oembed.new(response)
103
+ end
104
+
105
+ def embed_params
106
+ p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
107
+
108
+ p['list'] = list_id if list_id
109
+
110
+ # Parse timestrings, and assign the result as a start= parameter
111
+ start = if params['start']
112
+ params['start']
113
+ elsif params['t']
114
+ params['t']
115
+ elsif uri.fragment && uri.fragment.start_with?('t=')
116
+ # referencing uri is safe here because any throws were already caught by video_id returning nil
117
+ # remove the t= from the start
118
+ uri.fragment[2..-1]
119
+ end
120
+
121
+ p['start'] = parse_timestring(start) if start
122
+ p['end'] = parse_timestring params['end'] if params['end']
123
+
124
+ # Official workaround for looping videos
125
+ # https://developers.google.com/youtube/player_parameters#loop
126
+ # use params.include? so that you can just add "&loop"
127
+ if params.include?('loop')
128
+ p['loop'] = 1
129
+ p['playlist'] = video_id
130
+ end
131
+
132
+ # https://developers.google.com/youtube/player_parameters#rel
133
+ p['rel'] = 0 if params.include?('rel')
134
+
135
+ URI.encode_www_form(p)
136
+ end
137
+
138
+ def parse_timestring(string)
139
+ if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
140
+ ($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i
141
+ end
142
+ end
143
+
144
+ def params
145
+ return {} unless uri.query
146
+ # This mapping is necessary because CGI.parse returns a hash of keys to arrays.
147
+ # And *that* is necessary because querystrings support arrays, so they
148
+ # force you to deal with it to avoid security issues that would pop up
149
+ # if one day it suddenly gave you an array.
150
+ #
151
+ # However, we aren't interested. Just take the first one.
152
+ @params ||= begin
153
+ p = {}
154
+ CGI.parse(uri.query).each { |k, v| p[k] = v.first }
155
+ p
156
+ end
157
+ rescue
158
+ {}
159
+ end
160
+
161
+ end
162
+ end
163
+ end
File without changes
@@ -1,188 +1,188 @@
1
- module Onebox
2
- module Helpers
3
-
4
- class DownloadTooLarge < StandardError; end
5
-
6
- def self.symbolize_keys(hash)
7
- return {} if hash.nil?
8
-
9
- hash.inject({}) do |result, (key, value)|
10
- new_key = key.is_a?(String) ? key.to_sym : key
11
- new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
12
- result[new_key] = new_value
13
- result
14
- end
15
- end
16
-
17
- def self.clean(html)
18
- html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
19
- end
20
-
21
- def self.fetch_html_doc(url, headers = nil)
22
- response = (fetch_response(url, nil, nil, headers) rescue nil)
23
- doc = Nokogiri::HTML(response)
24
-
25
- ignore_canonical = doc.at('meta[property="og:ignore_canonical"]')
26
- unless ignore_canonical && ignore_canonical['content'].to_s == 'true'
27
- # prefer canonical link
28
- canonical_link = doc.at('//link[@rel="canonical"]/@href')
29
- if canonical_link && "#{URI(canonical_link).host}#{URI(canonical_link).path}" != "#{URI(url).host}#{URI(url).path}"
30
- response = (fetch_response(canonical_link, nil, nil, headers) rescue nil)
31
- doc = Nokogiri::HTML(response) if response
32
- end
33
- end
34
-
35
- doc
36
- end
37
-
38
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
39
-
40
- limit ||= 5
41
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
42
-
43
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
44
-
45
- uri = URI(location)
46
- uri = URI("#{domain}#{location}") if !uri.host
47
-
48
- result = StringIO.new
49
- Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
50
- http.open_timeout = Onebox.options.connect_timeout
51
- http.read_timeout = Onebox.options.timeout
52
- if uri.is_a?(URI::HTTPS)
53
- http.use_ssl = true
54
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
55
- end
56
-
57
- headers ||= {}
58
-
59
- if Onebox.options.user_agent && !headers['User-Agent']
60
- headers['User-Agent'] = Onebox.options.user_agent
61
- end
62
-
63
- request = Net::HTTP::Get.new(uri.request_uri, headers)
64
- start_time = Time.now
65
-
66
- size_bytes = Onebox.options.max_download_kb * 1024
67
- http.request(request) do |response|
68
-
69
- if cookie = response.get_fields('set-cookie')
70
- header = { 'Cookie' => cookie.join }
71
- end
72
-
73
- header = nil unless header.is_a? Hash
74
-
75
- code = response.code.to_i
76
- unless code === 200
77
- response.error! unless [301, 302].include?(code)
78
- return fetch_response(
79
- response['location'],
80
- limit - 1,
81
- "#{uri.scheme}://#{uri.host}",
82
- header
83
- )
84
- end
85
-
86
- response.read_body do |chunk|
87
- result.write(chunk)
88
- raise DownloadTooLarge.new if result.size > size_bytes
89
- raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
90
- end
91
-
92
- return result.string
93
- end
94
- end
95
- end
96
-
97
- def self.fetch_content_length(location)
98
- uri = URI(location)
99
-
100
- Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
101
- http.open_timeout = Onebox.options.connect_timeout
102
- http.read_timeout = Onebox.options.timeout
103
- if uri.is_a?(URI::HTTPS)
104
- http.use_ssl = true
105
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
106
- end
107
-
108
- http.request_head([uri.path, uri.query].join("?")) do |response|
109
- code = response.code.to_i
110
- unless code === 200 || Onebox::Helpers.blank?(response.header['content-length'])
111
- return nil
112
- end
113
- return response.header['content-length']
114
- end
115
- end
116
- end
117
-
118
- def self.pretty_filesize(size)
119
- conv = [ 'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB' ]
120
- scale = 1024
121
-
122
- ndx = 1
123
- if (size < 2 * (scale**ndx)) then
124
- return "#{(size)} #{conv[ndx - 1]}"
125
- end
126
- size = size.to_f
127
- [2, 3, 4, 5, 6, 7].each do |i|
128
- if (size < 2 * (scale**i)) then
129
- return "#{'%.2f' % (size / (scale**(i - 1)))} #{conv[i - 1]}"
130
- end
131
- end
132
- ndx = 7
133
- return "#{'%.2f' % (size / (scale**(ndx - 1)))} #{conv[ndx - 1]}"
134
- end
135
-
136
- def self.click_to_scroll_div(width = 690, height = 400)
137
- "<div style=\"background:transparent;position:relative;width:#{width}px;height:#{height}px;top:#{height}px;margin-top:-#{height}px;\" onClick=\"style.pointerEvents='none'\"></div>"
138
- end
139
-
140
- def self.blank?(value)
141
- if value.nil?
142
- true
143
- elsif String === value
144
- value.empty? || !(/[[:^space:]]/ === value)
145
- else
146
- value.respond_to?(:empty?) ? !!value.empty? : !value
147
- end
148
- end
149
-
150
- def self.truncate(string, length = 50)
151
- string.size > length ? string[0...(string.rindex(" ", length) || length)] + "..." : string
152
- end
153
-
154
- def self.get(meta, attr)
155
- (meta && !blank?(meta[attr])) ? sanitize(meta[attr]) : nil
156
- end
157
-
158
- def self.sanitize(value, length = 50)
159
- return nil if blank?(value)
160
- Sanitize.fragment(value).strip
161
- end
162
-
163
- def self.normalize_url_for_output(url)
164
- return "" unless url
165
- url = url.dup
166
- # expect properly encoded url, remove any unsafe chars
167
- url.gsub!("'", "&apos;")
168
- url.gsub!('"', "&quot;")
169
- url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%]/, "")
170
- url
171
- end
172
-
173
- def self.get_absolute_image_url(src, url)
174
- if src && !!(src =~ /^\/\//)
175
- uri = URI(url)
176
- src = "#{uri.scheme}:#{src}"
177
- elsif src && src.match(/^https?:\/\//i).nil?
178
- uri = URI(url)
179
- src = if !src.start_with?("/") && uri.path.present?
180
- "#{uri.scheme}://#{uri.host.sub(/\/$/, '')}#{uri.path.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
181
- else
182
- "#{uri.scheme}://#{uri.host.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
183
- end
184
- end
185
- src
186
- end
187
- end
188
- end
1
+ module Onebox
2
+ module Helpers
3
+
4
+ class DownloadTooLarge < StandardError; end
5
+
6
+ def self.symbolize_keys(hash)
7
+ return {} if hash.nil?
8
+
9
+ hash.inject({}) do |result, (key, value)|
10
+ new_key = key.is_a?(String) ? key.to_sym : key
11
+ new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
12
+ result[new_key] = new_value
13
+ result
14
+ end
15
+ end
16
+
17
+ def self.clean(html)
18
+ html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
19
+ end
20
+
21
+ def self.fetch_html_doc(url, headers = nil)
22
+ response = (fetch_response(url, nil, nil, headers) rescue nil)
23
+ doc = Nokogiri::HTML(response)
24
+
25
+ ignore_canonical = doc.at('meta[property="og:ignore_canonical"]')
26
+ unless ignore_canonical && ignore_canonical['content'].to_s == 'true'
27
+ # prefer canonical link
28
+ canonical_link = doc.at('//link[@rel="canonical"]/@href')
29
+ if canonical_link && "#{URI(canonical_link).host}#{URI(canonical_link).path}" != "#{URI(url).host}#{URI(url).path}"
30
+ response = (fetch_response(canonical_link, nil, nil, headers) rescue nil)
31
+ doc = Nokogiri::HTML(response) if response
32
+ end
33
+ end
34
+
35
+ doc
36
+ end
37
+
38
+ def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
39
+
40
+ limit ||= 5
41
+ limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
42
+
43
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
44
+
45
+ uri = URI(location)
46
+ uri = URI("#{domain}#{location}") if !uri.host
47
+
48
+ result = StringIO.new
49
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
50
+ http.open_timeout = Onebox.options.connect_timeout
51
+ http.read_timeout = Onebox.options.timeout
52
+ if uri.is_a?(URI::HTTPS)
53
+ http.use_ssl = true
54
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
55
+ end
56
+
57
+ headers ||= {}
58
+
59
+ if Onebox.options.user_agent && !headers['User-Agent']
60
+ headers['User-Agent'] = Onebox.options.user_agent
61
+ end
62
+
63
+ request = Net::HTTP::Get.new(uri.request_uri, headers)
64
+ start_time = Time.now
65
+
66
+ size_bytes = Onebox.options.max_download_kb * 1024
67
+ http.request(request) do |response|
68
+
69
+ if cookie = response.get_fields('set-cookie')
70
+ header = { 'Cookie' => cookie.join }
71
+ end
72
+
73
+ header = nil unless header.is_a? Hash
74
+
75
+ code = response.code.to_i
76
+ unless code === 200
77
+ response.error! unless [301, 302].include?(code)
78
+ return fetch_response(
79
+ response['location'],
80
+ limit - 1,
81
+ "#{uri.scheme}://#{uri.host}",
82
+ header
83
+ )
84
+ end
85
+
86
+ response.read_body do |chunk|
87
+ result.write(chunk)
88
+ raise DownloadTooLarge.new if result.size > size_bytes
89
+ raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
90
+ end
91
+
92
+ return result.string
93
+ end
94
+ end
95
+ end
96
+
97
+ def self.fetch_content_length(location)
98
+ uri = URI(location)
99
+
100
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
101
+ http.open_timeout = Onebox.options.connect_timeout
102
+ http.read_timeout = Onebox.options.timeout
103
+ if uri.is_a?(URI::HTTPS)
104
+ http.use_ssl = true
105
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
106
+ end
107
+
108
+ http.request_head([uri.path, uri.query].join("?")) do |response|
109
+ code = response.code.to_i
110
+ unless code === 200 || Onebox::Helpers.blank?(response.header['content-length'])
111
+ return nil
112
+ end
113
+ return response.header['content-length']
114
+ end
115
+ end
116
+ end
117
+
118
+ def self.pretty_filesize(size)
119
+ conv = [ 'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB' ]
120
+ scale = 1024
121
+
122
+ ndx = 1
123
+ if (size < 2 * (scale**ndx)) then
124
+ return "#{(size)} #{conv[ndx - 1]}"
125
+ end
126
+ size = size.to_f
127
+ [2, 3, 4, 5, 6, 7].each do |i|
128
+ if (size < 2 * (scale**i)) then
129
+ return "#{'%.2f' % (size / (scale**(i - 1)))} #{conv[i - 1]}"
130
+ end
131
+ end
132
+ ndx = 7
133
+ return "#{'%.2f' % (size / (scale**(ndx - 1)))} #{conv[ndx - 1]}"
134
+ end
135
+
136
+ def self.click_to_scroll_div(width = 690, height = 400)
137
+ "<div style=\"background:transparent;position:relative;width:#{width}px;height:#{height}px;top:#{height}px;margin-top:-#{height}px;\" onClick=\"style.pointerEvents='none'\"></div>"
138
+ end
139
+
140
+ def self.blank?(value)
141
+ if value.nil?
142
+ true
143
+ elsif String === value
144
+ value.empty? || !(/[[:^space:]]/ === value)
145
+ else
146
+ value.respond_to?(:empty?) ? !!value.empty? : !value
147
+ end
148
+ end
149
+
150
+ def self.truncate(string, length = 50)
151
+ string.size > length ? string[0...(string.rindex(" ", length) || length)] + "..." : string
152
+ end
153
+
154
+ def self.get(meta, attr)
155
+ (meta && !blank?(meta[attr])) ? sanitize(meta[attr]) : nil
156
+ end
157
+
158
+ def self.sanitize(value, length = 50)
159
+ return nil if blank?(value)
160
+ Sanitize.fragment(value).strip
161
+ end
162
+
163
+ def self.normalize_url_for_output(url)
164
+ return "" unless url
165
+ url = url.dup
166
+ # expect properly encoded url, remove any unsafe chars
167
+ url.gsub!("'", "&apos;")
168
+ url.gsub!('"', "&quot;")
169
+ url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%]/, "")
170
+ url
171
+ end
172
+
173
+ def self.get_absolute_image_url(src, url)
174
+ if src && !!(src =~ /^\/\//)
175
+ uri = URI(url)
176
+ src = "#{uri.scheme}:#{src}"
177
+ elsif src && src.match(/^https?:\/\//i).nil?
178
+ uri = URI(url)
179
+ src = if !src.start_with?("/") && uri.path.present?
180
+ "#{uri.scheme}://#{uri.host.sub(/\/$/, '')}#{uri.path.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
181
+ else
182
+ "#{uri.scheme}://#{uri.host.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
183
+ end
184
+ end
185
+ src
186
+ end
187
+ end
188
+ end