onebox 2.2.6 → 2.2.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43221e11b40d179b5a90c7c84f6c79096ba999b06ab411d159668c775de75e99
4
- data.tar.gz: f23358a13d9644974b3e266f5acfd32eb13e113e085a4806369f2f858397e425
3
+ metadata.gz: cdbe1e67e709c8a2c8179a7118f4b51447a15a1bf1b56619fc601d3a9486123e
4
+ data.tar.gz: 8a72a7fc2152789c4103de330b7dcd69ec9a9c487fdc499a6068bdd0840e2e26
5
5
  SHA512:
6
- metadata.gz: 290c2d0fbc5b6e2f3a4a4557e82cf6f509b8756d4c56be587af86cf95bd5c9cd58ba01e5b266b29268723941efbd542d03d89afa92bf08f0a8c87b42752e03c8
7
- data.tar.gz: 5497119a20088d0b09fe69b172926ba60318cf4145a174bac66f8ef781fc7465e6cf0e9524f94156633cb605d3e9081f62697b641ce3e514c2cfc74d2a8026f3
6
+ metadata.gz: 3fe9a20e93b0aea3a3a7362bb5e9f026ddb63ab0c7edb64f5b7a1d9831fa76d001314ea0fdf07c92daefe95fc096e8068d28e3e6f5c7860689e5545646031084
7
+ data.tar.gz: 4e2e19424dab311c7a56feec82486333520f72575f95acccd1a11b8419616d2ea895e02472542d6bcdce7c5d6fbf72f887eab3941f67dd546762349bec3b66a9
@@ -44,6 +44,6 @@ jobs:
44
44
  - uses: actions/checkout@v2
45
45
 
46
46
  - name: Release Gem
47
- uses: CvX/publish-rubygems-action@master
47
+ uses: discourse/publish-rubygems-action@main
48
48
  env:
49
49
  RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
@@ -227,8 +227,10 @@ module Onebox
227
227
  d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
228
228
  d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
229
229
  d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
230
+ d[:image] = nil if Onebox::Helpers.blank?(d[:image])
230
231
 
231
232
  d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
233
+ d[:video] = nil if Onebox::Helpers.blank?(d[:video])
232
234
 
233
235
  d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
234
236
  if !Onebox::Helpers.blank?(d[:published_time])
@@ -11,11 +11,25 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # If possible, fetch the cached HTML body immediately so we can
18
+ # try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if body_cacher&.respond_to?('cache_response_body?')
21
+ if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
23
+ end
24
+ end
25
+
26
+ if @raw
27
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
28
+ return canonical_link.to_s if canonical_link
29
+ end
30
+
17
31
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
32
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
33
  end
20
34
 
21
35
  @url
@@ -26,16 +40,15 @@ module Onebox
26
40
  end
27
41
 
28
42
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
32
- }
43
+ if @options && @options[:user_agent]
44
+ { 'User-Agent' => @options[:user_agent] }
45
+ end
33
46
  end
34
47
 
35
48
  private
36
49
 
37
50
  def match
38
- @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[^\/]+)(?:\/|$)/mi)
51
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
39
52
  end
40
53
 
41
54
  def image
@@ -50,6 +63,10 @@ module Onebox
50
63
  end
51
64
 
52
65
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
66
+ attributes = landing_image.first.attributes
67
+
68
+ return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
69
+
53
70
  landing_image.first["src"].to_s
54
71
  end
55
72
 
@@ -100,7 +117,7 @@ module Onebox
100
117
  end
101
118
 
102
119
  result = {
103
- link: link,
120
+ link: url,
104
121
  title: title,
105
122
  by_info: authors,
106
123
  image: og.image || image,
@@ -131,7 +148,7 @@ module Onebox
131
148
  end
132
149
 
133
150
  result = {
134
- link: link,
151
+ link: url,
135
152
  title: title,
136
153
  by_info: authors,
137
154
  image: og.image || image,
@@ -147,7 +164,7 @@ module Onebox
147
164
  else
148
165
  title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
149
166
  result = {
150
- link: link,
167
+ link: url,
151
168
  title: title,
152
169
  image: og.image || image,
153
170
  price: price
@@ -157,7 +174,10 @@ module Onebox
157
174
  result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
158
175
 
159
176
  summary = raw.at("#productDescription")
160
- result[:description] = og.description || (summary && summary.inner_text) || CGI.unescapeHTML(Onebox::Helpers.truncate(raw.css("meta[name=description]").first["content"], 250))
177
+
178
+ description = og.description || summary&.inner_text
179
+ description ||= raw.css("meta[name=description]").first&.[]("content")
180
+ result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
161
181
  end
162
182
 
163
183
  result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -20,6 +20,7 @@ module Onebox
20
20
  @match ||= @url.match(%r{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/pull/(?<number>[^/]+)})
21
21
  end
22
22
 
23
+ GITHUB_COMMENT_REGEX = /(<!--.*?-->\r\n)/
23
24
  def data
24
25
  result = raw.clone
25
26
  result['link'] = link
@@ -31,6 +32,10 @@ module Onebox
31
32
 
32
33
  ulink = URI(link)
33
34
  result['domain'] = "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}"
35
+
36
+ body = (result['body'] || '').gsub(GITHUB_COMMENT_REGEX, '')
37
+ result['body'] = body.present? ? body : nil
38
+
34
39
  result
35
40
  end
36
41
  end
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,11 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
+ end
16
+
17
+ def body_cacher
18
+ self.options&.[](:body_cacher)
15
19
  end
16
20
 
17
21
  def html?
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -13,11 +13,36 @@ module Onebox
13
13
  WIDTH ||= 480
14
14
  HEIGHT ||= 360
15
15
 
16
- def placeholder_html
17
- og = get_opengraph.data
16
+ def parse_embed_response
17
+ return unless video_id
18
+ return @parse_embed_response if defined?(@parse_embed_response)
19
+
20
+ embed_url = "https://www.youtube.com/embed/#{video_id}"
21
+ @embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
22
+
23
+ begin
24
+ script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
25
+ match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
26
+
27
+ yt_json = ::JSON.parse(match[:json])
28
+ renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
29
+
30
+ title = renderer['title']['runs'].first['text']
31
+
32
+ image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
33
+ rescue
34
+ return
35
+ end
36
+
37
+ @parse_embed_response = { image: image, title: title }
38
+ end
18
39
 
40
+ def placeholder_html
19
41
  if video_id || list_id
20
- "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
42
+ result = parse_embed_response
43
+ result ||= get_opengraph.data
44
+
45
+ "<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
21
46
  else
22
47
  to_html
23
48
  end
@@ -52,7 +77,10 @@ module Onebox
52
77
  end
53
78
 
54
79
  def video_title
55
- @video_title ||= get_opengraph.data[:title]
80
+ @video_title ||= begin
81
+ result = parse_embed_response || get_opengraph.data
82
+ result[:title]
83
+ end
56
84
  end
57
85
 
58
86
  private
@@ -7,7 +7,7 @@ module Onebox
7
7
 
8
8
  class DownloadTooLarge < StandardError; end
9
9
 
10
- IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com']
10
+ IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com', 'youtube.com']
11
11
 
12
12
  def self.symbolize_keys(hash)
13
13
  return {} if hash.nil?
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -178,6 +189,10 @@ module Onebox
178
189
  url.gsub!("'", "&apos;")
179
190
  url.gsub!('"', "&quot;")
180
191
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
192
+
193
+ parsed = Addressable::URI.parse(url)
194
+ return "" unless parsed.host
195
+
181
196
  url
182
197
  end
183
198
 
@@ -32,7 +32,8 @@ module Onebox
32
32
  if method_name.end_with?(*integer_suffixes)
33
33
  value.to_i
34
34
  elsif method_name.end_with?(*url_suffixes)
35
- ::Onebox::Helpers.normalize_url_for_output(value)
35
+ result = Onebox::Helpers.normalize_url_for_output(value)
36
+ result unless Onebox::Helpers::blank?(result)
36
37
  else
37
38
  value
38
39
  end
@@ -6,7 +6,7 @@ class Sanitize
6
6
  HTTP_PROTOCOLS ||= ['http', 'https', :relative].freeze
7
7
 
8
8
  ONEBOX ||= freeze_config merge(RELAXED,
9
- elements: RELAXED[:elements] + %w[audio embed iframe source video svg path],
9
+ elements: RELAXED[:elements] + %w[audio details embed iframe source video svg path],
10
10
 
11
11
  attributes: {
12
12
  'a' => RELAXED[:attributes]['a'] + %w(target),
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.6"
4
+ VERSION = "2.2.12"
5
5
  end
@@ -4,9 +4,22 @@
4
4
  </div>
5
5
 
6
6
  <div class="github-info-container">
7
- <h4>
8
- <a href="{{html_url}}" target="_blank" rel="noopener">{{title}}</a>
9
- </h4>
7
+ {{^body}}
8
+ <h4>
9
+ <a href="{{html_url}}" target="_blank" rel="noopener">{{title}}</a>
10
+ </h4>
11
+ {{/body}}
12
+
13
+ {{#body}}
14
+ <details class="onebox-details">
15
+ <summary class="onebox-details-summary">
16
+ <h4>
17
+ <a href="{{html_url}}" target="_blank" rel="noopener">{{title}}</a>
18
+ </h4>
19
+ </summary>
20
+ <p class="onebox-details-body">{{body}}</p>
21
+ </details>
22
+ {{/body}}
10
23
 
11
24
  <div class="branches">
12
25
  <code>{{base.label}}</code> ← <code>{{head.label}}</code>
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.6
4
+ version: 2.2.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
8
8
  - Vyki Englert
9
9
  - Robin Ward
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-02-25 00:00:00.000000000 Z
13
+ date: 2021-04-12 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable
@@ -405,7 +405,7 @@ homepage: https://github.com/discourse/onebox
405
405
  licenses:
406
406
  - MIT
407
407
  metadata: {}
408
- post_install_message:
408
+ post_install_message:
409
409
  rdoc_options: []
410
410
  require_paths:
411
411
  - lib
@@ -421,7 +421,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
421
421
  version: '0'
422
422
  requirements: []
423
423
  rubygems_version: 3.0.3
424
- signing_key:
424
+ signing_key:
425
425
  specification_version: 4
426
426
  summary: A gem for generating embeddable HTML previews from URLs.
427
427
  test_files: []