onebox 2.2.5 → 2.2.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5124afb82cbf3d3103700c9b6e8ff935ab054f27ec9e115fa67e318554b402a
4
- data.tar.gz: 66d69a4e0fea63708a7740b021e1ef4f149b5dd0537d0fc1fd060b8fa7d6ce96
3
+ metadata.gz: 00cdd8f25e8df4b1c992886e522e6dc6671973de81194796974f527c4f18aa7c
4
+ data.tar.gz: 96f9e2de815bef82f86170e0abf97234072c824d97a3322daef992e1dbc41a91
5
5
  SHA512:
6
- metadata.gz: 75dfc88ec22ce1c633a6614068171f6f2d5f16841f6d0d800af6b4bf348eff7a46c64c85d64a701a6a791f551c635e24cb84ddc3de452c572579916029c672e5
7
- data.tar.gz: d9d5ac8c391e3f638e1e0ac9aa198ecb4259bcc1c38e626cea0b68ca527659d21f8d74f47ad6b99a94e02fda10fa4a0812b175ebef52b08d1f07dba037994210
6
+ metadata.gz: 57ff4ab92abbd8f2a5cbbedb7a88f3d7a517e4c1c779810f1259b58cc60c3591c43e9d4782984ab56a1389b00ee3acbaebc6298208813b694a5f73255bc3c0a7
7
+ data.tar.gz: 8baea1e77f32c09724a03452ba6018721681e9832d03b23bbcf1bc61609a56cabd8098d114dd49c66c468919b1d1c4cee4cc97212a6ac97fac229bc8f4eb886b
@@ -44,6 +44,6 @@ jobs:
44
44
  - uses: actions/checkout@v2
45
45
 
46
46
  - name: Release Gem
47
- uses: CvX/publish-rubygems-action@master
47
+ uses: discourse/publish-rubygems-action@main
48
48
  env:
49
49
  RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
data/lib/onebox.rb CHANGED
@@ -21,8 +21,7 @@ module Onebox
21
21
  allowed_ports: [80, 443],
22
22
  allowed_schemes: ["http", "https"],
23
23
  sanitize_config: Sanitize::Config::ONEBOX,
24
- redirect_limit: 5,
25
- disable_media_download_controls: false
24
+ redirect_limit: 5
26
25
  }
27
26
 
28
27
  @@options = DEFAULTS
@@ -227,8 +227,10 @@ module Onebox
227
227
  d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
228
228
  d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
229
229
  d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
230
+ d[:image] = nil if Onebox::Helpers.blank?(d[:image])
230
231
 
231
232
  d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
233
+ d[:video] = nil if Onebox::Helpers.blank?(d[:video])
232
234
 
233
235
  d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
234
236
  if !Onebox::Helpers.blank?(d[:published_time])
@@ -11,11 +11,25 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # If possible, fetch the cached HTML body immediately so we can
18
+ # try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if body_cacher&.respond_to?('cache_response_body?')
21
+ if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
23
+ end
24
+ end
25
+
26
+ if @raw
27
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
28
+ return canonical_link.to_s if canonical_link
29
+ end
30
+
17
31
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
32
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
33
  end
20
34
 
21
35
  @url
@@ -26,16 +40,15 @@ module Onebox
26
40
  end
27
41
 
28
42
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
32
- }
43
+ if @options && @options[:user_agent]
44
+ { 'User-Agent' => @options[:user_agent] }
45
+ end
33
46
  end
34
47
 
35
48
  private
36
49
 
37
50
  def match
38
- @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[^\/]+)(?:\/|$)/mi)
51
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
39
52
  end
40
53
 
41
54
  def image
@@ -50,6 +63,10 @@ module Onebox
50
63
  end
51
64
 
52
65
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
66
+ attributes = landing_image.first.attributes
67
+
68
+ return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
69
+
53
70
  landing_image.first["src"].to_s
54
71
  end
55
72
 
@@ -100,7 +117,7 @@ module Onebox
100
117
  end
101
118
 
102
119
  result = {
103
- link: link,
120
+ link: url,
104
121
  title: title,
105
122
  by_info: authors,
106
123
  image: og.image || image,
@@ -131,7 +148,7 @@ module Onebox
131
148
  end
132
149
 
133
150
  result = {
134
- link: link,
151
+ link: url,
135
152
  title: title,
136
153
  by_info: authors,
137
154
  image: og.image || image,
@@ -147,7 +164,7 @@ module Onebox
147
164
  else
148
165
  title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
149
166
  result = {
150
- link: link,
167
+ link: url,
151
168
  title: title,
152
169
  image: og.image || image,
153
170
  price: price
@@ -157,7 +174,10 @@ module Onebox
157
174
  result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
158
175
 
159
176
  summary = raw.at("#productDescription")
160
- result[:description] = og.description || (summary && summary.inner_text) || CGI.unescapeHTML(Onebox::Helpers.truncate(raw.css("meta[name=description]").first["content"], 250))
177
+
178
+ description = og.description || summary&.inner_text
179
+ description ||= raw.css("meta[name=description]").first&.[]("content")
180
+ result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
161
181
  end
162
182
 
163
183
  result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,11 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
+ end
16
+
17
+ def body_cacher
18
+ self.options&.[](:body_cacher)
15
19
  end
16
20
 
17
21
  def html?
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -13,11 +13,36 @@ module Onebox
13
13
  WIDTH ||= 480
14
14
  HEIGHT ||= 360
15
15
 
16
- def placeholder_html
17
- og = get_opengraph.data
16
+ def parse_embed_response
17
+ return unless video_id
18
+ return @parse_embed_response if defined?(@parse_embed_response)
19
+
20
+ embed_url = "https://www.youtube.com/embed/#{video_id}"
21
+ @embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
22
+
23
+ begin
24
+ script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
25
+ match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
26
+
27
+ yt_json = ::JSON.parse(match[:json])
28
+ renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
29
+
30
+ title = renderer['title']['runs'].first['text']
31
+
32
+ image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
33
+ rescue
34
+ return
35
+ end
36
+
37
+ @parse_embed_response = { image: image, title: title }
38
+ end
18
39
 
40
+ def placeholder_html
19
41
  if video_id || list_id
20
- "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
42
+ result = parse_embed_response
43
+ result ||= get_opengraph.data
44
+
45
+ "<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
21
46
  else
22
47
  to_html
23
48
  end
@@ -52,7 +77,10 @@ module Onebox
52
77
  end
53
78
 
54
79
  def video_title
55
- @video_title ||= get_opengraph.data[:title]
80
+ @video_title ||= begin
81
+ result = parse_embed_response || get_opengraph.data
82
+ result[:title]
83
+ end
56
84
  end
57
85
 
58
86
  private
@@ -7,7 +7,7 @@ module Onebox
7
7
 
8
8
  class DownloadTooLarge < StandardError; end
9
9
 
10
- IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com']
10
+ IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com', 'youtube.com']
11
11
 
12
12
  def self.symbolize_keys(hash)
13
13
  return {} if hash.nil?
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -178,6 +189,10 @@ module Onebox
178
189
  url.gsub!("'", "&apos;")
179
190
  url.gsub!('"', "&quot;")
180
191
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
192
+
193
+ parsed = Addressable::URI.parse(url)
194
+ return "" unless parsed.host
195
+
181
196
  url
182
197
  end
183
198
 
@@ -32,7 +32,8 @@ module Onebox
32
32
  if method_name.end_with?(*integer_suffixes)
33
33
  value.to_i
34
34
  elsif method_name.end_with?(*url_suffixes)
35
- ::Onebox::Helpers.normalize_url_for_output(value)
35
+ result = Onebox::Helpers.normalize_url_for_output(value)
36
+ result unless Onebox::Helpers::blank?(result)
36
37
  else
37
38
  value
38
39
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.5"
4
+ VERSION = "2.2.11"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.5
4
+ version: 2.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-02-25 00:00:00.000000000 Z
13
+ date: 2021-04-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable