onebox 1.8.80 → 1.8.81

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -0
  3. data/.rspec +0 -0
  4. data/.rubocop.yml +0 -0
  5. data/.ruby-gemset +0 -0
  6. data/.travis.yml +0 -0
  7. data/CHANGELOG.md +26 -26
  8. data/Gemfile +0 -0
  9. data/Gemfile.lock +154 -152
  10. data/Guardfile +0 -0
  11. data/LICENSE.txt +0 -0
  12. data/README.md +223 -223
  13. data/Rakefile +0 -0
  14. data/lib/onebox.rb +0 -0
  15. data/lib/onebox/engine.rb +188 -188
  16. data/lib/onebox/engine/amazon_onebox.rb +167 -167
  17. data/lib/onebox/engine/asciinema_onebox.rb +0 -0
  18. data/lib/onebox/engine/audio_onebox.rb +0 -0
  19. data/lib/onebox/engine/audioboom_onebox.rb +24 -24
  20. data/lib/onebox/engine/bandcamp_onebox.rb +32 -32
  21. data/lib/onebox/engine/cloudapp_onebox.rb +51 -51
  22. data/lib/onebox/engine/coub_onebox.rb +21 -21
  23. data/lib/onebox/engine/douban_onebox.rb +0 -0
  24. data/lib/onebox/engine/five_hundred_px_onebox.rb +17 -17
  25. data/lib/onebox/engine/flickr_onebox.rb +0 -0
  26. data/lib/onebox/engine/flickr_shortened_onebox.rb +0 -0
  27. data/lib/onebox/engine/gfycat_onebox.rb +0 -0
  28. data/lib/onebox/engine/giphy_onebox.rb +22 -22
  29. data/lib/onebox/engine/github_blob_onebox.rb +0 -0
  30. data/lib/onebox/engine/github_commit_onebox.rb +0 -0
  31. data/lib/onebox/engine/github_gist_onebox.rb +0 -0
  32. data/lib/onebox/engine/github_issue_onebox.rb +0 -0
  33. data/lib/onebox/engine/github_pullrequest_onebox.rb +0 -0
  34. data/lib/onebox/engine/gitlab_blob_onebox.rb +0 -0
  35. data/lib/onebox/engine/google_calendar_onebox.rb +0 -0
  36. data/lib/onebox/engine/google_docs_onebox.rb +0 -0
  37. data/lib/onebox/engine/google_maps_onebox.rb +0 -0
  38. data/lib/onebox/engine/google_photos_onebox.rb +57 -57
  39. data/lib/onebox/engine/google_play_app_onebox.rb +0 -0
  40. data/lib/onebox/engine/html.rb +0 -0
  41. data/lib/onebox/engine/image_onebox.rb +0 -0
  42. data/lib/onebox/engine/imgur_onebox.rb +65 -65
  43. data/lib/onebox/engine/instagram_onebox.rb +32 -32
  44. data/lib/onebox/engine/json.rb +0 -0
  45. data/lib/onebox/engine/kaltura_onebox.rb +31 -31
  46. data/lib/onebox/engine/mixcloud_onebox.rb +20 -20
  47. data/lib/onebox/engine/opengraph_image.rb +12 -12
  48. data/lib/onebox/engine/pastebin_onebox.rb +0 -0
  49. data/lib/onebox/engine/pdf_onebox.rb +0 -0
  50. data/lib/onebox/engine/pubmed_onebox.rb +0 -0
  51. data/lib/onebox/engine/replit_onebox.rb +24 -24
  52. data/lib/onebox/engine/sketchfab_onebox.rb +31 -31
  53. data/lib/onebox/engine/slides_onebox.rb +0 -0
  54. data/lib/onebox/engine/soundcloud_onebox.rb +31 -31
  55. data/lib/onebox/engine/stack_exchange_onebox.rb +0 -0
  56. data/lib/onebox/engine/standard_embed.rb +145 -145
  57. data/lib/onebox/engine/steam_store_onebox.rb +37 -37
  58. data/lib/onebox/engine/trello_onebox.rb +0 -0
  59. data/lib/onebox/engine/twitch_clips_onebox.rb +0 -0
  60. data/lib/onebox/engine/twitch_stream_onebox.rb +0 -0
  61. data/lib/onebox/engine/twitch_video_onebox.rb +0 -0
  62. data/lib/onebox/engine/twitter_status_onebox.rb +0 -0
  63. data/lib/onebox/engine/typeform_onebox.rb +41 -41
  64. data/lib/onebox/engine/video_onebox.rb +0 -0
  65. data/lib/onebox/engine/vimeo_onebox.rb +20 -20
  66. data/lib/onebox/engine/wechat_mp_onebox.rb +0 -0
  67. data/lib/onebox/engine/whitelisted_generic_onebox.rb +366 -366
  68. data/lib/onebox/engine/wikimedia_onebox.rb +0 -0
  69. data/lib/onebox/engine/wikipedia_onebox.rb +0 -0
  70. data/lib/onebox/engine/wistia_onebox.rb +27 -27
  71. data/lib/onebox/engine/xkcd_onebox.rb +0 -0
  72. data/lib/onebox/engine/youku_onebox.rb +0 -0
  73. data/lib/onebox/engine/youtube_onebox.rb +163 -163
  74. data/lib/onebox/file_type_finder.rb +0 -0
  75. data/lib/onebox/helpers.rb +188 -188
  76. data/lib/onebox/layout.rb +0 -0
  77. data/lib/onebox/layout_support.rb +0 -0
  78. data/lib/onebox/matcher.rb +0 -0
  79. data/lib/onebox/mixins/git_blob_onebox.rb +0 -0
  80. data/lib/onebox/mixins/twitch_onebox.rb +0 -0
  81. data/lib/onebox/oembed.rb +15 -12
  82. data/lib/onebox/open_graph.rb +90 -88
  83. data/lib/onebox/preview.rb +0 -0
  84. data/lib/onebox/sanitize_config.rb +0 -0
  85. data/lib/onebox/status_check.rb +0 -0
  86. data/lib/onebox/template_support.rb +0 -0
  87. data/lib/onebox/version.rb +5 -5
  88. data/lib/onebox/view.rb +0 -0
  89. data/lib/onebox/web.rb +0 -0
  90. data/lib/onebox/web_helpers.rb +0 -0
  91. data/onebox.gemspec +0 -0
  92. data/templates/_layout.mustache +0 -0
  93. data/templates/amazon.mustache +0 -0
  94. data/templates/douban.mustache +0 -0
  95. data/templates/githubblob.mustache +0 -0
  96. data/templates/githubcommit.mustache +0 -0
  97. data/templates/githubgist.mustache +0 -0
  98. data/templates/githubissue.mustache +0 -0
  99. data/templates/githubpullrequest.mustache +0 -0
  100. data/templates/gitlabblob.mustache +0 -0
  101. data/templates/googledocs.mustache +0 -0
  102. data/templates/googleplayapp.mustache +0 -0
  103. data/templates/instagram.mustache +0 -0
  104. data/templates/pastebin.mustache +0 -0
  105. data/templates/pdf.mustache +0 -0
  106. data/templates/pubmed.mustache +0 -0
  107. data/templates/stackexchange.mustache +0 -0
  108. data/templates/twitterstatus.mustache +0 -0
  109. data/templates/wechatmp.mustache +0 -0
  110. data/templates/whitelistedgeneric.mustache +0 -0
  111. data/templates/wikimedia.mustache +0 -0
  112. data/templates/wikipedia.mustache +0 -0
  113. data/templates/xkcd.mustache +0 -0
  114. metadata +4 -3
File without changes
@@ -1,20 +1,20 @@
1
- module Onebox
2
- module Engine
3
- class VimeoOnebox
4
- include Engine
5
- include StandardEmbed
6
-
7
- matches_regexp(/^https?:\/\/(www\.)?vimeo\.com\/\d+(\/[^\/]+)?$/)
8
- always_https
9
-
10
- def placeholder_html
11
- oembed = get_oembed
12
- "<img src='#{oembed.thumbnail_url}' width='#{oembed.thumbnail_width}' height='#{oembed.thumbnail_height}' #{oembed.title_attr}>"
13
- end
14
-
15
- def to_html
16
- get_oembed.html
17
- end
18
- end
19
- end
20
- end
1
+ module Onebox
2
+ module Engine
3
+ class VimeoOnebox
4
+ include Engine
5
+ include StandardEmbed
6
+
7
+ matches_regexp(/^https?:\/\/(www\.)?vimeo\.com\/\d+(\/[^\/]+)?$/)
8
+ always_https
9
+
10
+ def placeholder_html
11
+ oembed = get_oembed
12
+ "<img src='#{oembed.thumbnail_url}' width='#{oembed.thumbnail_width}' height='#{oembed.thumbnail_height}' #{oembed.title_attr}>"
13
+ end
14
+
15
+ def to_html
16
+ get_oembed.html
17
+ end
18
+ end
19
+ end
20
+ end
File without changes
@@ -1,366 +1,366 @@
1
- require 'htmlentities'
2
-
3
- module Onebox
4
- module Engine
5
- class WhitelistedGenericOnebox
6
- include Engine
7
- include StandardEmbed
8
- include LayoutSupport
9
-
10
- def self.whitelist=(list)
11
- @whitelist = list
12
- end
13
-
14
- def self.whitelist
15
- @whitelist ||= default_whitelist.dup
16
- end
17
-
18
- def self.default_whitelist
19
- %w(
20
- 23hq.com
21
- 500px.com
22
- 8tracks.com
23
- abc.net.au
24
- about.com
25
- answers.com
26
- arstechnica.com
27
- ask.com
28
- battle.net
29
- bbc.co.uk
30
- bbs.boingboing.net
31
- bestbuy.ca
32
- bestbuy.com
33
- blip.tv
34
- bloomberg.com
35
- businessinsider.com
36
- change.org
37
- clikthrough.com
38
- cnet.com
39
- cnn.com
40
- codepen.io
41
- collegehumor.com
42
- consider.it
43
- coursera.org
44
- cracked.com
45
- dailymail.co.uk
46
- dailymotion.com
47
- deadline.com
48
- dell.com
49
- deviantart.com
50
- digg.com
51
- dotsub.com
52
- ebay.ca
53
- ebay.co.uk
54
- ebay.com
55
- ehow.com
56
- espn.go.com
57
- etsy.com
58
- findery.com
59
- folksy.com
60
- forbes.com
61
- foxnews.com
62
- funnyordie.com
63
- gifs.com
64
- groupon.com
65
- howtogeek.com
66
- huffingtonpost.ca
67
- huffingtonpost.com
68
- hulu.com
69
- ign.com
70
- ikea.com
71
- imdb.com
72
- indiatimes.com
73
- itunes.apple.com
74
- khanacademy.org
75
- kickstarter.com
76
- kinomap.com
77
- lessonplanet.com
78
- liveleak.com
79
- livestream.com
80
- mashable.com
81
- medium.com
82
- meetup.com
83
- mixcloud.com
84
- mlb.com
85
- myshopify.com
86
- myspace.com
87
- nba.com
88
- npr.org
89
- nytimes.com
90
- photobucket.com
91
- pinterest.com
92
- reference.com
93
- revision3.com
94
- rottentomatoes.com
95
- samsung.com
96
- screenr.com
97
- scribd.com
98
- simplecast.com
99
- slideshare.net
100
- sourceforge.net
101
- speakerdeck.com
102
- spotify.com
103
- squidoo.com
104
- streamable.com
105
- techcrunch.com
106
- ted.com
107
- thefreedictionary.com
108
- theglobeandmail.com
109
- thenextweb.com
110
- theonion.com
111
- thestar.com
112
- thesun.co.uk
113
- thinkgeek.com
114
- tmz.com
115
- torontosun.com
116
- tumblr.com
117
- twitpic.com
118
- usatoday.com
119
- viddler.com
120
- videojug.com
121
- vine.co
122
- walmart.com
123
- washingtonpost.com
124
- wi.st
125
- wikia.com
126
- wikihow.com
127
- wired.com
128
- wistia.com
129
- wonderhowto.com
130
- wsj.com
131
- zappos.com
132
- zillow.com
133
- )
134
- end
135
-
136
- # Often using the `html` attribute is not what we want, like for some blogs that
137
- # include the entire page HTML. However for some providers like Flickr it allows us
138
- # to return gifv and galleries.
139
- def self.default_html_providers
140
- ['Flickr', 'Meetup']
141
- end
142
-
143
- def self.html_providers
144
- @html_providers ||= default_html_providers.dup
145
- end
146
-
147
- def self.html_providers=(new_provs)
148
- @html_providers = new_provs
149
- end
150
-
151
- # A re-written URL converts http:// -> https://
152
- def self.rewrites
153
- @rewrites ||= https_hosts.dup
154
- end
155
-
156
- def self.rewrites=(new_list)
157
- @rewrites = new_list
158
- end
159
-
160
- def self.https_hosts
161
- %w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com)
162
- end
163
-
164
- def self.host_matches(uri, list)
165
- !!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) }
166
- end
167
-
168
- def self.probable_discourse(uri)
169
- !!(uri.path =~ /\/t\/[^\/]+\/\d+(\/\d+)?(\?.*)?$/)
170
- end
171
-
172
- def self.probable_wordpress(uri)
173
- !!(uri.path =~ /\d{4}\/\d{2}\//)
174
- end
175
-
176
- def self.twitter_label_whitelist
177
- ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
178
- end
179
-
180
- def self.===(other)
181
- other.kind_of?(URI) ?
182
- host_matches(other, whitelist) || probable_wordpress(other) || probable_discourse(other) :
183
- super
184
- end
185
-
186
- def to_html
187
- rewrite_https(generic_html)
188
- end
189
-
190
- def placeholder_html
191
- return article_html if is_article?
192
- return image_html if has_image? && (is_video? || is_image?)
193
- return article_html if has_text? && is_embedded?
194
- to_html
195
- end
196
-
197
- def data
198
- @data ||= begin
199
- html_entities = HTMLEntities.new
200
- d = { link: link }.merge(raw)
201
-
202
- if !Onebox::Helpers.blank?(d[:title])
203
- d[:title] = html_entities.decode(Onebox::Helpers.truncate(d[:title], 80))
204
- end
205
-
206
- d[:description] ||= d[:summary]
207
- if !Onebox::Helpers.blank?(d[:description])
208
- d[:description] = html_entities.decode(Onebox::Helpers.truncate(d[:description], 250))
209
- end
210
-
211
- if !Onebox::Helpers.blank?(d[:site_name])
212
- d[:domain] = html_entities.decode(Onebox::Helpers.truncate(d[:site_name], 80))
213
- elsif !Onebox::Helpers.blank?(d[:domain])
214
- d[:domain] = "http://#{d[:domain]}" unless d[:domain] =~ /^https?:\/\//
215
- d[:domain] = URI(d[:domain]).host.to_s.sub(/^www\./, '') rescue nil
216
- end
217
-
218
- # prefer secure URLs
219
- d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
220
- d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
221
-
222
- d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
223
-
224
- d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
225
- if !Onebox::Helpers.blank?(d[:published_time])
226
- d[:article_published_time] = Time.parse(d[:published_time]).strftime("%-d %b %y")
227
- d[:article_published_time_title] = Time.parse(d[:published_time]).strftime("%I:%M%p - %d %B %Y")
228
- end
229
-
230
- # Twitter labels
231
- if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label1] =~ /#{l}/i }
232
- d[:label_1] = Onebox::Helpers.truncate(d[:label1])
233
- d[:data_1] = Onebox::Helpers.truncate(d[:data1])
234
- end
235
- if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label2] =~ /#{l}/i }
236
- unless Onebox::Helpers.blank?(d[:label_1])
237
- d[:label_2] = Onebox::Helpers.truncate(d[:label2])
238
- d[:data_2] = Onebox::Helpers.truncate(d[:data2])
239
- else
240
- d[:label_1] = Onebox::Helpers.truncate(d[:label2])
241
- d[:data_1] = Onebox::Helpers.truncate(d[:data2])
242
- end
243
- end
244
-
245
- if Onebox::Helpers.blank?(d[:label_1]) && !Onebox::Helpers.blank?(d[:price_amount]) && !Onebox::Helpers.blank?(d[:price_currency])
246
- d[:label_1] = "Price"
247
- d[:data_1] = Onebox::Helpers.truncate("#{d[:price_currency].strip} #{d[:price_amount].strip}")
248
- end
249
-
250
- d
251
- end
252
- end
253
-
254
- private
255
-
256
- def rewrite_https(html)
257
- return unless html
258
- uri = URI(@url)
259
- html.gsub!("http://", "https://") if WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.rewrites)
260
- html
261
- end
262
-
263
- def generic_html
264
- return card_html if is_card?
265
- return article_html if is_article?
266
- return video_html if is_video?
267
- return image_html if is_image?
268
- return embedded_html if is_embedded?
269
- return article_html if has_text?
270
- end
271
-
272
- def is_card?
273
- data[:card] == 'player' && data[:player] =~ URI::regexp
274
- end
275
-
276
- def is_article?
277
- (data[:type] =~ /article/ || data[:asset_type] =~ /article/) &&
278
- has_text?
279
- end
280
-
281
- def has_text?
282
- !Onebox::Helpers.blank?(data[:title]) &&
283
- !Onebox::Helpers.blank?(data[:description])
284
- end
285
-
286
- def is_image?
287
- data[:type] =~ /photo|image/ &&
288
- data[:type] !~ /photostream/ &&
289
- has_image?
290
- end
291
-
292
- def has_image?
293
- !Onebox::Helpers.blank?(data[:image])
294
- end
295
-
296
- def is_video?
297
- data[:type] =~ /^video[\/\.]/ && !Onebox::Helpers.blank?(data[:video])
298
- end
299
-
300
- def is_embedded?
301
- data[:html] &&
302
- data[:height] &&
303
- (
304
- data[:html]["iframe"] ||
305
- WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
306
- )
307
- end
308
-
309
- def card_html
310
- escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player])
311
-
312
- <<~RAW
313
- <iframe src="#{escaped_url}"
314
- width="#{data[:player_width] || "100%"}"
315
- height="#{data[:player_height]}"
316
- scrolling="no"
317
- frameborder="0">
318
- </iframe>
319
- RAW
320
- end
321
-
322
- def article_html
323
- layout.to_html
324
- end
325
-
326
- def image_html
327
- return if Onebox::Helpers.blank?(data[:image])
328
-
329
- escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])
330
-
331
- alt = data[:description] || data[:title]
332
- width = data[:image_width] || data[:thumbnail_width] || data[:width]
333
- height = data[:image_height] || data[:thumbnail_height] || data[:height]
334
-
335
- "<img src='#{escaped_src}' alt='#{alt}' width='#{width}' height='#{height}' class='onebox'>"
336
- end
337
-
338
- def video_html
339
- escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:video])
340
-
341
- <<-HTML
342
- <video title='#{data[:title]}'
343
- width='#{data[:video_width]}'
344
- height='#{data[:video_height]}'
345
- style='max-width:100%'
346
- controls=''>
347
- <source src='#{escaped_src}'>
348
- </video>
349
- HTML
350
- end
351
-
352
- def embedded_html
353
- fragment = Nokogiri::HTML::fragment(data[:html])
354
- fragment.css("img").each { |img| img["class"] = "thumbnail" }
355
- if iframe = fragment.at_css("iframe")
356
- iframe.remove_attribute("style")
357
- iframe["width"] = data[:width] || "100%"
358
- iframe["height"] = data[:height]
359
- iframe["scrolling"] = "no"
360
- iframe["frameborder"] = "0"
361
- end
362
- fragment.to_html
363
- end
364
- end
365
- end
366
- end
1
+ require 'htmlentities'
2
+
3
+ module Onebox
4
+ module Engine
5
+ class WhitelistedGenericOnebox
6
+ include Engine
7
+ include StandardEmbed
8
+ include LayoutSupport
9
+
10
+ def self.whitelist=(list)
11
+ @whitelist = list
12
+ end
13
+
14
+ def self.whitelist
15
+ @whitelist ||= default_whitelist.dup
16
+ end
17
+
18
+ def self.default_whitelist
19
+ %w(
20
+ 23hq.com
21
+ 500px.com
22
+ 8tracks.com
23
+ abc.net.au
24
+ about.com
25
+ answers.com
26
+ arstechnica.com
27
+ ask.com
28
+ battle.net
29
+ bbc.co.uk
30
+ bbs.boingboing.net
31
+ bestbuy.ca
32
+ bestbuy.com
33
+ blip.tv
34
+ bloomberg.com
35
+ businessinsider.com
36
+ change.org
37
+ clikthrough.com
38
+ cnet.com
39
+ cnn.com
40
+ codepen.io
41
+ collegehumor.com
42
+ consider.it
43
+ coursera.org
44
+ cracked.com
45
+ dailymail.co.uk
46
+ dailymotion.com
47
+ deadline.com
48
+ dell.com
49
+ deviantart.com
50
+ digg.com
51
+ dotsub.com
52
+ ebay.ca
53
+ ebay.co.uk
54
+ ebay.com
55
+ ehow.com
56
+ espn.go.com
57
+ etsy.com
58
+ findery.com
59
+ folksy.com
60
+ forbes.com
61
+ foxnews.com
62
+ funnyordie.com
63
+ gifs.com
64
+ groupon.com
65
+ howtogeek.com
66
+ huffingtonpost.ca
67
+ huffingtonpost.com
68
+ hulu.com
69
+ ign.com
70
+ ikea.com
71
+ imdb.com
72
+ indiatimes.com
73
+ itunes.apple.com
74
+ khanacademy.org
75
+ kickstarter.com
76
+ kinomap.com
77
+ lessonplanet.com
78
+ liveleak.com
79
+ livestream.com
80
+ mashable.com
81
+ medium.com
82
+ meetup.com
83
+ mixcloud.com
84
+ mlb.com
85
+ myshopify.com
86
+ myspace.com
87
+ nba.com
88
+ npr.org
89
+ nytimes.com
90
+ photobucket.com
91
+ pinterest.com
92
+ reference.com
93
+ revision3.com
94
+ rottentomatoes.com
95
+ samsung.com
96
+ screenr.com
97
+ scribd.com
98
+ simplecast.com
99
+ slideshare.net
100
+ sourceforge.net
101
+ speakerdeck.com
102
+ spotify.com
103
+ squidoo.com
104
+ streamable.com
105
+ techcrunch.com
106
+ ted.com
107
+ thefreedictionary.com
108
+ theglobeandmail.com
109
+ thenextweb.com
110
+ theonion.com
111
+ thestar.com
112
+ thesun.co.uk
113
+ thinkgeek.com
114
+ tmz.com
115
+ torontosun.com
116
+ tumblr.com
117
+ twitpic.com
118
+ usatoday.com
119
+ viddler.com
120
+ videojug.com
121
+ vine.co
122
+ walmart.com
123
+ washingtonpost.com
124
+ wi.st
125
+ wikia.com
126
+ wikihow.com
127
+ wired.com
128
+ wistia.com
129
+ wonderhowto.com
130
+ wsj.com
131
+ zappos.com
132
+ zillow.com
133
+ )
134
+ end
135
+
136
+ # Often using the `html` attribute is not what we want, like for some blogs that
137
+ # include the entire page HTML. However for some providers like Flickr it allows us
138
+ # to return gifv and galleries.
139
+ def self.default_html_providers
140
+ ['Flickr', 'Meetup']
141
+ end
142
+
143
+ def self.html_providers
144
+ @html_providers ||= default_html_providers.dup
145
+ end
146
+
147
+ def self.html_providers=(new_provs)
148
+ @html_providers = new_provs
149
+ end
150
+
151
+ # A re-written URL converts http:// -> https://
152
+ def self.rewrites
153
+ @rewrites ||= https_hosts.dup
154
+ end
155
+
156
+ def self.rewrites=(new_list)
157
+ @rewrites = new_list
158
+ end
159
+
160
+ def self.https_hosts
161
+ %w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com)
162
+ end
163
+
164
+ def self.host_matches(uri, list)
165
+ !!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) }
166
+ end
167
+
168
+ def self.probable_discourse(uri)
169
+ !!(uri.path =~ /\/t\/[^\/]+\/\d+(\/\d+)?(\?.*)?$/)
170
+ end
171
+
172
+ def self.probable_wordpress(uri)
173
+ !!(uri.path =~ /\d{4}\/\d{2}\//)
174
+ end
175
+
176
+ def self.twitter_label_whitelist
177
+ ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
178
+ end
179
+
180
+ def self.===(other)
181
+ other.kind_of?(URI) ?
182
+ host_matches(other, whitelist) || probable_wordpress(other) || probable_discourse(other) :
183
+ super
184
+ end
185
+
186
+ def to_html
187
+ rewrite_https(generic_html)
188
+ end
189
+
190
+ def placeholder_html
191
+ return article_html if is_article?
192
+ return image_html if has_image? && (is_video? || is_image?)
193
+ return article_html if has_text? && is_embedded?
194
+ to_html
195
+ end
196
+
197
+ def data
198
+ @data ||= begin
199
+ html_entities = HTMLEntities.new
200
+ d = { link: link }.merge(raw)
201
+
202
+ if !Onebox::Helpers.blank?(d[:title])
203
+ d[:title] = html_entities.decode(Onebox::Helpers.truncate(d[:title], 80))
204
+ end
205
+
206
+ d[:description] ||= d[:summary]
207
+ if !Onebox::Helpers.blank?(d[:description])
208
+ d[:description] = html_entities.decode(Onebox::Helpers.truncate(d[:description], 250))
209
+ end
210
+
211
+ if !Onebox::Helpers.blank?(d[:site_name])
212
+ d[:domain] = html_entities.decode(Onebox::Helpers.truncate(d[:site_name], 80))
213
+ elsif !Onebox::Helpers.blank?(d[:domain])
214
+ d[:domain] = "http://#{d[:domain]}" unless d[:domain] =~ /^https?:\/\//
215
+ d[:domain] = URI(d[:domain]).host.to_s.sub(/^www\./, '') rescue nil
216
+ end
217
+
218
+ # prefer secure URLs
219
+ d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
220
+ d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
221
+
222
+ d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
223
+
224
+ d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
225
+ if !Onebox::Helpers.blank?(d[:published_time])
226
+ d[:article_published_time] = Time.parse(d[:published_time]).strftime("%-d %b %y")
227
+ d[:article_published_time_title] = Time.parse(d[:published_time]).strftime("%I:%M%p - %d %B %Y")
228
+ end
229
+
230
+ # Twitter labels
231
+ if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label1] =~ /#{l}/i }
232
+ d[:label_1] = Onebox::Helpers.truncate(d[:label1])
233
+ d[:data_1] = Onebox::Helpers.truncate(d[:data1])
234
+ end
235
+ if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!WhitelistedGenericOnebox.twitter_label_whitelist.find { |l| d[:label2] =~ /#{l}/i }
236
+ unless Onebox::Helpers.blank?(d[:label_1])
237
+ d[:label_2] = Onebox::Helpers.truncate(d[:label2])
238
+ d[:data_2] = Onebox::Helpers.truncate(d[:data2])
239
+ else
240
+ d[:label_1] = Onebox::Helpers.truncate(d[:label2])
241
+ d[:data_1] = Onebox::Helpers.truncate(d[:data2])
242
+ end
243
+ end
244
+
245
+ if Onebox::Helpers.blank?(d[:label_1]) && !Onebox::Helpers.blank?(d[:price_amount]) && !Onebox::Helpers.blank?(d[:price_currency])
246
+ d[:label_1] = "Price"
247
+ d[:data_1] = Onebox::Helpers.truncate("#{d[:price_currency].strip} #{d[:price_amount].strip}")
248
+ end
249
+
250
+ d
251
+ end
252
+ end
253
+
254
+ private
255
+
256
+ def rewrite_https(html)
257
+ return unless html
258
+ uri = URI(@url)
259
+ html.gsub!("http://", "https://") if WhitelistedGenericOnebox.host_matches(uri, WhitelistedGenericOnebox.rewrites)
260
+ html
261
+ end
262
+
263
+ def generic_html
264
+ return card_html if is_card?
265
+ return article_html if is_article?
266
+ return video_html if is_video?
267
+ return image_html if is_image?
268
+ return embedded_html if is_embedded?
269
+ return article_html if has_text?
270
+ end
271
+
272
+ def is_card?
273
+ data[:card] == 'player' && data[:player] =~ URI::regexp
274
+ end
275
+
276
+ def is_article?
277
+ (data[:type] =~ /article/ || data[:asset_type] =~ /article/) &&
278
+ has_text?
279
+ end
280
+
281
+ def has_text?
282
+ !Onebox::Helpers.blank?(data[:title]) &&
283
+ !Onebox::Helpers.blank?(data[:description])
284
+ end
285
+
286
+ def is_image?
287
+ data[:type] =~ /photo|image/ &&
288
+ data[:type] !~ /photostream/ &&
289
+ has_image?
290
+ end
291
+
292
+ def has_image?
293
+ !Onebox::Helpers.blank?(data[:image])
294
+ end
295
+
296
+ def is_video?
297
+ data[:type] =~ /^video[\/\.]/ && !Onebox::Helpers.blank?(data[:video])
298
+ end
299
+
300
+ def is_embedded?
301
+ data[:html] &&
302
+ data[:height] &&
303
+ (
304
+ data[:html]["iframe"] ||
305
+ WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
306
+ )
307
+ end
308
+
309
+ def card_html
310
+ escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player])
311
+
312
+ <<~RAW
313
+ <iframe src="#{escaped_url}"
314
+ width="#{data[:player_width] || "100%"}"
315
+ height="#{data[:player_height]}"
316
+ scrolling="no"
317
+ frameborder="0">
318
+ </iframe>
319
+ RAW
320
+ end
321
+
322
+ def article_html
323
+ layout.to_html
324
+ end
325
+
326
+ def image_html
327
+ return if Onebox::Helpers.blank?(data[:image])
328
+
329
+ escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])
330
+
331
+ alt = data[:description] || data[:title]
332
+ width = data[:image_width] || data[:thumbnail_width] || data[:width]
333
+ height = data[:image_height] || data[:thumbnail_height] || data[:height]
334
+
335
+ "<img src='#{escaped_src}' alt='#{alt}' width='#{width}' height='#{height}' class='onebox'>"
336
+ end
337
+
338
+ def video_html
339
+ escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:video])
340
+
341
+ <<-HTML
342
+ <video title='#{data[:title]}'
343
+ width='#{data[:video_width]}'
344
+ height='#{data[:video_height]}'
345
+ style='max-width:100%'
346
+ controls=''>
347
+ <source src='#{escaped_src}'>
348
+ </video>
349
+ HTML
350
+ end
351
+
352
+ def embedded_html
353
+ fragment = Nokogiri::HTML::fragment(data[:html])
354
+ fragment.css("img").each { |img| img["class"] = "thumbnail" }
355
+ if iframe = fragment.at_css("iframe")
356
+ iframe.remove_attribute("style")
357
+ iframe["width"] = data[:width] || "100%"
358
+ iframe["height"] = data[:height]
359
+ iframe["scrolling"] = "no"
360
+ iframe["frameborder"] = "0"
361
+ end
362
+ fragment.to_html
363
+ end
364
+ end
365
+ end
366
+ end