onebox 2.2.14 → 2.2.15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/lib/onebox/engine.rb +6 -10
  3. data/lib/onebox/engine/allowlisted_generic_onebox.rb +0 -9
  4. data/lib/onebox/engine/amazon_onebox.rb +23 -16
  5. data/lib/onebox/engine/flickr_onebox.rb +2 -2
  6. data/lib/onebox/engine/gfycat_onebox.rb +26 -26
  7. data/lib/onebox/engine/github_commit_onebox.rb +1 -1
  8. data/lib/onebox/engine/github_folder_onebox.rb +1 -1
  9. data/lib/onebox/engine/google_docs_onebox.rb +22 -40
  10. data/lib/onebox/engine/google_maps_onebox.rb +10 -6
  11. data/lib/onebox/engine/google_photos_onebox.rb +6 -6
  12. data/lib/onebox/engine/imgur_onebox.rb +2 -2
  13. data/lib/onebox/engine/instagram_onebox.rb +2 -3
  14. data/lib/onebox/engine/pastebin_onebox.rb +11 -15
  15. data/lib/onebox/engine/pdf_onebox.rb +7 -15
  16. data/lib/onebox/engine/pubmed_onebox.rb +16 -12
  17. data/lib/onebox/engine/stack_exchange_onebox.rb +1 -1
  18. data/lib/onebox/engine/standard_embed.rb +0 -3
  19. data/lib/onebox/engine/trello_onebox.rb +3 -6
  20. data/lib/onebox/engine/youku_onebox.rb +0 -6
  21. data/lib/onebox/helpers.rb +2 -1
  22. data/lib/onebox/layout.rb +2 -14
  23. data/lib/onebox/matcher.rb +10 -8
  24. data/lib/onebox/mixins/git_blob_onebox.rb +3 -5
  25. data/lib/onebox/open_graph.rb +4 -4
  26. data/lib/onebox/preview.rb +2 -2
  27. data/lib/onebox/version.rb +1 -1
  28. data/templates/_layout.mustache +6 -2
  29. data/templates/allowlistedgeneric.mustache +8 -9
  30. data/templates/amazon.mustache +5 -2
  31. data/templates/githubblob.mustache +44 -34
  32. data/templates/githubcommit.mustache +1 -3
  33. data/templates/githubfolder.mustache +2 -2
  34. data/templates/githubgist.mustache +9 -6
  35. data/templates/githubissue.mustache +3 -3
  36. data/templates/githubpullrequest.mustache +1 -1
  37. data/templates/gitlabblob.mustache +11 -4
  38. data/templates/googledocs.mustache +2 -2
  39. data/templates/googledrive.mustache +2 -2
  40. data/templates/googleplayapp.mustache +2 -1
  41. data/templates/instagram.mustache +1 -1
  42. data/templates/pastebin.mustache +6 -2
  43. data/templates/pdf.mustache +6 -3
  44. data/templates/stackexchange.mustache +1 -0
  45. data/templates/twitterstatus.mustache +20 -5
  46. data/templates/wikimedia.mustache +2 -2
  47. data/templates/wikipedia.mustache +2 -2
  48. data/templates/xkcd.mustache +2 -2
  49. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b9626bc9bec1d423e617a946df2e4be0334cf6f7693056dc79865b50f127e26
4
- data.tar.gz: 9fc7c684bc99a33d356cfbf0c1a085e5dc1531aa6a6a3028670afb05406c4448
3
+ metadata.gz: 5e2169ef1cfb44f0208566f5ecff7bd7c5d2bbe2f12b58791f50aa24b776e56d
4
+ data.tar.gz: 9f9d38b578e46e6ce8c8c007577ff02522eab24f3dcc42f9eabd5b838a668e72
5
5
  SHA512:
6
- metadata.gz: 2a3f999936fbfe289e07b58b424e735d13c2e3132beffb16c41a6094fa42aa0dd1018d544175eba1f1ad7552b26cade7096d72ff286268a097230835d8f9c99d
7
- data.tar.gz: 1effdda4c94dc9dbded959fc3acf0a51d6efd09629cccd2e9c2c1df74d637c9b8cb577be7fb79dc90b3ba42125af6dfde0c56e33cb8b31f6d4a72d0090a0fded
6
+ metadata.gz: e293af57162b61ad0fa3472b7d9b35709f13504308e69eba45f40e9babff53e0258186add06ed4a89bc8305b8903dc49718c274659b5fca0712bc26d281ca7d4
7
+ data.tar.gz: 3bb70c552e010149bc32fd4c9dbe416cd6ae0098b4286e5894fbc12c07628b5079c0463ad78b54ab6dc773954e055c9195aea5ac180ab8bdf7dca3847db1bc96
data/lib/onebox/engine.rb CHANGED
@@ -28,23 +28,19 @@ module Onebox
28
28
  end
29
29
  end
30
30
 
31
- attr_reader :url, :uri
32
- attr_reader :timeout
31
+ attr_reader :url, :uri, :options, :timeout
33
32
  attr :errors
34
33
 
35
34
  DEFAULT = {}
36
- def options
37
- @options
38
- end
39
35
 
40
36
  def options=(opt)
41
- return @options if opt.nil? #make sure options provided
42
- opt = opt.to_h if opt.instance_of?(OpenStruct)
37
+ return @options if opt.nil? # make sure options provided
38
+ opt = opt.to_h if opt.instance_of?(OpenStruct)
43
39
  @options.merge!(opt)
44
40
  @options
45
41
  end
46
42
 
47
- def initialize(link, timeout = nil)
43
+ def initialize(url, timeout = nil)
48
44
  @errors = {}
49
45
  @options = DEFAULT
50
46
  class_name = self.class.name.split("::").last.to_s
@@ -52,8 +48,8 @@ module Onebox
52
48
  # Set the engine options extracted from global options.
53
49
  self.options = Onebox.options[class_name] || {}
54
50
 
55
- @url = link
56
- @uri = URI(link)
51
+ @url = url
52
+ @uri = URI(url)
57
53
  if always_https?
58
54
  @uri.scheme = 'https'
59
55
  @url = @uri.to_s
@@ -27,7 +27,6 @@ module Onebox
27
27
  500px.com
28
28
  8tracks.com
29
29
  abc.net.au
30
- about.com
31
30
  answers.com
32
31
  arstechnica.com
33
32
  ask.com
@@ -36,11 +35,9 @@ module Onebox
36
35
  bbs.boingboing.net
37
36
  bestbuy.ca
38
37
  bestbuy.com
39
- blip.tv
40
38
  bloomberg.com
41
39
  businessinsider.com
42
40
  change.org
43
- clikthrough.com
44
41
  cnet.com
45
42
  cnn.com
46
43
  codepen.io
@@ -90,7 +87,6 @@ module Onebox
90
87
  meetup.com
91
88
  mixcloud.com
92
89
  mlb.com
93
- myshopify.com
94
90
  myspace.com
95
91
  nba.com
96
92
  npr.org
@@ -98,16 +94,13 @@ module Onebox
98
94
  photobucket.com
99
95
  pinterest.com
100
96
  reference.com
101
- revision3.com
102
97
  rottentomatoes.com
103
98
  samsung.com
104
- screenr.com
105
99
  scribd.com
106
100
  slideshare.net
107
101
  sourceforge.net
108
102
  speakerdeck.com
109
103
  spotify.com
110
- squidoo.com
111
104
  streamable.com
112
105
  techcrunch.com
113
106
  ted.com
@@ -124,7 +117,6 @@ module Onebox
124
117
  twitpic.com
125
118
  usatoday.com
126
119
  viddler.com
127
- videojug.com
128
120
  vine.co
129
121
  walmart.com
130
122
  washingtonpost.com
@@ -275,7 +267,6 @@ module Onebox
275
267
 
276
268
  def rewrite_https(html)
277
269
  return unless html
278
- uri = URI(@url)
279
270
  if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
280
271
  html = html.gsub("http://", "https://")
281
272
  end
@@ -19,10 +19,8 @@ module Onebox
19
19
  # If possible, fetch the cached HTML body immediately so we can
20
20
  # try to grab the canonical URL from that document,
21
21
  # rather than guess at the best URL structure to use
22
- if body_cacher&.respond_to?('cache_response_body?')
23
- if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
24
- @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
25
- end
22
+ if !@raw && has_cached_body
23
+ @raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
26
24
  end
27
25
 
28
26
  if @raw
@@ -31,7 +29,8 @@ module Onebox
31
29
  end
32
30
 
33
31
  if match && match[:id]
34
- return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
32
+ id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
33
+ return "https://www.amazon.#{tld}/dp/#{id}"
35
34
  end
36
35
 
37
36
  @url
@@ -49,6 +48,12 @@ module Onebox
49
48
 
50
49
  private
51
50
 
51
+ def has_cached_body
52
+ body_cacher&.respond_to?('cache_response_body?') &&
53
+ body_cacher.cache_response_body?(uri.to_s) &&
54
+ body_cacher.cached_response_body_exists?(uri.to_s)
55
+ end
56
+
52
57
  def match
53
58
  @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
54
59
  end
@@ -57,9 +62,9 @@ module Onebox
57
62
  if (main_image = raw.css("#main-image")) && main_image.any?
58
63
  attributes = main_image.first.attributes
59
64
 
60
- return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
61
-
62
- if attributes["data-a-dynamic-image"]
65
+ if attributes["data-a-hires"]
66
+ return attributes["data-a-hires"].to_s
67
+ elsif attributes["data-a-dynamic-image"]
63
68
  return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
64
69
  end
65
70
  end
@@ -67,9 +72,11 @@ module Onebox
67
72
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
68
73
  attributes = landing_image.first.attributes
69
74
 
70
- return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
71
-
72
- landing_image.first["src"].to_s
75
+ if attributes["data-old-hires"]
76
+ return attributes["data-old-hires"].to_s
77
+ else
78
+ return landing_image.first["src"].to_s
79
+ end
73
80
  end
74
81
 
75
82
  if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
@@ -91,16 +98,16 @@ module Onebox
91
98
  end
92
99
 
93
100
  def multiple_authors(authors_xpath)
94
- author_list = raw.xpath(authors_xpath)
95
- authors = []
96
- author_list.each { |a| authors << a.inner_text.strip }
97
- authors.join(", ")
101
+ raw
102
+ .xpath(authors_xpath)
103
+ .map { |a| a.inner_text.strip }
104
+ .join(", ")
98
105
  end
99
106
 
100
107
  def data
101
108
  og = ::Onebox::OpenGraph.new(raw)
102
109
 
103
- if raw.at_css('#dp.book_mobile') #printed books
110
+ if raw.at_css('#dp.book_mobile') # printed books
104
111
  title = raw.at("h1#title")&.inner_text
105
112
  authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
106
113
  rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
@@ -32,7 +32,7 @@ module Onebox
32
32
  <span class='album-title'>#{album_title}</span>
33
33
  </span>
34
34
  </span>
35
- <img src='#{og.get_secure_image}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
35
+ <img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
36
36
  </a>
37
37
  </div>
38
38
  HTML
@@ -43,7 +43,7 @@ module Onebox
43
43
 
44
44
  <<-HTML
45
45
  <a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
46
- <img src='#{og.get_secure_image}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
46
+ <img src='#{og.secure_image_url}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
47
47
  </a>
48
48
  HTML
49
49
  end
@@ -9,8 +9,8 @@ module Onebox
9
9
  matches_regexp(/^https?:\/\/gfycat\.com\//)
10
10
  always_https
11
11
 
12
+ # This engine should have priority over AllowlistedGenericOnebox.
12
13
  def self.priority
13
- # This engine should have priority over AllowlistedGenericOnebox.
14
14
  1
15
15
  end
16
16
 
@@ -21,6 +21,7 @@ module Onebox
21
21
  <img src="https://gfycat.com/static/favicons/favicon-96x96.png" class="site-icon" width="64" height="64">
22
22
  <a href="#{data[:url]}" target="_blank" rel="nofollow ugc noopener">Gfycat.com</a>
23
23
  </header>
24
+
24
25
  <article class="onebox-body">
25
26
  <h4>
26
27
  #{data[:title]} by
@@ -36,11 +37,12 @@ module Onebox
36
37
  <img title="Sorry, your browser doesn't support HTML5 video." src="#{data[:posterUrl]}">
37
38
  </video>
38
39
  </div>
40
+
39
41
  <p>
40
42
  <span class="label1">#{data[:keywords]}</span>
41
43
  </p>
42
-
43
44
  </article>
45
+
44
46
  <div style="clear: both"></div>
45
47
  </aside>
46
48
  HTML
@@ -61,52 +63,50 @@ module Onebox
61
63
  @match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?<name>.+)/)
62
64
  end
63
65
 
64
- def nokogiri_page
65
- @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
- Nokogiri::HTML(response)
68
- end
69
- end
66
+ def og_data
67
+ return @og_data if defined?(@og_data)
70
68
 
71
- def get_og_data
72
- og_data = {}
69
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
70
+ page = Nokogiri::HTML(response)
71
+ script = page.at_css('script[type="application/ld+json"]')
73
72
 
74
- if json_string = nokogiri_page.at_css('script[type="application/ld+json"]')&.text
75
- og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
73
+ if json_string = script&.text
74
+ @og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
75
+ else
76
+ @og_data = {}
76
77
  end
77
-
78
- og_data
79
78
  end
80
79
 
81
80
  def data
82
- og_data = get_og_data
81
+ return @data if defined?(@data)
83
82
 
84
- response = {
83
+ @data = {
85
84
  name: match[:name],
86
85
  title: og_data[:headline] || 'No Title',
87
86
  author: og_data[:author],
88
- url: @url
87
+ url: @url,
89
88
  }
90
89
 
91
- keywords = og_data[:keywords]&.split(',')
92
- if keywords
93
- response[:keywords] = keywords.map { |t| "<a href='https://gfycat.com/gifs/search/#{t}'>##{t}</a>" }.join(' ')
90
+ if keywords = og_data[:keywords]&.split(',')
91
+ @data[:keywords] = keywords
92
+ .map { |keyword| "<a href='https://gfycat.com/gifs/search/#{keyword}'>##{keyword}</a>" }
93
+ .join(' ')
94
94
  end
95
95
 
96
96
  if og_data[:video]
97
97
  content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
98
98
  video_url = Pathname.new(content_url)
99
- response[:webmUrl] = video_url.sub_ext(".webm").to_s
100
- response[:mp4Url] = video_url.sub_ext(".mp4").to_s
99
+ @data[:webmUrl] = video_url.sub_ext(".webm").to_s
100
+ @data[:mp4Url] = video_url.sub_ext(".mp4").to_s
101
101
 
102
102
  thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
103
- response[:posterUrl] = thumbnail_url
103
+ @data[:posterUrl] = thumbnail_url
104
104
 
105
- response[:width] = og_data[:video][:width]
106
- response[:height] = og_data[:video][:height]
105
+ @data[:width] = og_data[:video][:width]
106
+ @data[:height] = og_data[:video][:height]
107
107
  end
108
108
 
109
- response
109
+ @data
110
110
  end
111
111
  end
112
112
  end
@@ -10,7 +10,7 @@ module Onebox
10
10
  include JSON
11
11
  include Onebox::Mixins::GithubBody
12
12
 
13
- matches_regexp Regexp.new("^https?://(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:/)?(?:.)*/commit/")
13
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
14
14
  always_https
15
15
 
16
16
  def url
@@ -7,7 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
  include LayoutSupport
9
9
 
10
- matches_regexp Regexp.new(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
10
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
11
11
  always_https
12
12
 
13
13
  def self.priority
@@ -4,61 +4,43 @@ module Onebox
4
4
  module Engine
5
5
  class GoogleDocsOnebox
6
6
  include Engine
7
+ include StandardEmbed
7
8
  include LayoutSupport
8
9
 
9
- def self.supported_endpoints
10
- %w(spreadsheets document forms presentation)
11
- end
12
-
13
- def self.short_types
14
- @shorttypes ||= {
15
- spreadsheets: :sheets,
16
- document: :docs,
17
- presentation: :slides,
18
- forms: :forms,
19
- }
20
- end
10
+ SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
11
+ SHORT_TYPES = {
12
+ spreadsheets: :sheets,
13
+ document: :docs,
14
+ presentation: :slides,
15
+ forms: :forms,
16
+ }
21
17
 
22
- matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{supported_endpoints.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
18
+ matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
23
19
  always_https
24
20
 
25
- protected
21
+ private
26
22
 
27
23
  def data
28
- og_data = get_og_data
24
+ og_data = get_opengraph
25
+ short_type = SHORT_TYPES[match[:endpoint].to_sym]
26
+
27
+ description = if Onebox::Helpers.blank?(og_data.description)
28
+ "This #{short_type.to_s.chop.capitalize} is private"
29
+ else
30
+ Onebox::Helpers.truncate(og_data.description, 250)
31
+ end
32
+
29
33
  {
30
34
  link: link,
31
- title: og_data[:title] || "Google #{shorttype.to_s.capitalize}",
32
- description: Onebox::Helpers.truncate(og_data[:description], 250) || "This #{shorttype.to_s.chop.capitalize} is private",
33
- type: shorttype
35
+ title: og_data.title || "Google #{short_type.to_s.capitalize}",
36
+ description: description,
37
+ type: short_type
34
38
  }
35
39
  end
36
40
 
37
- def doc_type
38
- @doc_type ||= match[:endpoint].to_sym
39
- end
40
-
41
- def shorttype
42
- GoogleDocsOnebox.short_types[doc_type]
43
- end
44
-
45
41
  def match
46
42
  @match ||= @url.match(@@matcher)
47
43
  end
48
-
49
- def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
- html = Nokogiri::HTML(response)
52
- og_data = {}
53
- html.css('meta').each do |m|
54
- if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
55
- m_content = m.attribute('content').to_s.strip
56
- m_property = m.attribute('property').to_s.gsub('og:', '')
57
- og_data[m_property.to_sym] = m_content
58
- end
59
- end
60
- og_data
61
- end
62
44
  end
63
45
  end
64
46
  end
@@ -119,8 +119,6 @@ module Onebox
119
119
  @placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
120
120
 
121
121
  when :canonical
122
- uri = URI(@url)
123
-
124
122
  query = URI::decode_www_form(uri.query).to_h
125
123
  if !query.has_key?("ll")
126
124
  raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
@@ -163,14 +161,20 @@ module Onebox
163
161
  end
164
162
 
165
163
  def follow_redirect!
166
- uri = URI(@url)
167
164
  begin
168
- http = Net::HTTP.start(uri.host, uri.port,
169
- use_ssl: uri.scheme == 'https', open_timeout: timeout, read_timeout: timeout)
170
- response = http.head(uri.path)
165
+ http = Net::HTTP.start(
166
+ uri.host,
167
+ uri.port,
168
+ use_ssl: uri.scheme == 'https',
169
+ open_timeout: timeout,
170
+ read_timeout: timeout
171
+ )
171
172
 
173
+ response = http.head(uri.path)
172
174
  raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
175
+
173
176
  @url = response.code == "200" ? uri.to_s : response["Location"]
177
+ @uri = URI(@url)
174
178
  ensure
175
179
  http.finish rescue nil
176
180
  end