onebox 2.2.5 → 2.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5124afb82cbf3d3103700c9b6e8ff935ab054f27ec9e115fa67e318554b402a
4
- data.tar.gz: 66d69a4e0fea63708a7740b021e1ef4f149b5dd0537d0fc1fd060b8fa7d6ce96
3
+ metadata.gz: 00cdd8f25e8df4b1c992886e522e6dc6671973de81194796974f527c4f18aa7c
4
+ data.tar.gz: 96f9e2de815bef82f86170e0abf97234072c824d97a3322daef992e1dbc41a91
5
5
  SHA512:
6
- metadata.gz: 75dfc88ec22ce1c633a6614068171f6f2d5f16841f6d0d800af6b4bf348eff7a46c64c85d64a701a6a791f551c635e24cb84ddc3de452c572579916029c672e5
7
- data.tar.gz: d9d5ac8c391e3f638e1e0ac9aa198ecb4259bcc1c38e626cea0b68ca527659d21f8d74f47ad6b99a94e02fda10fa4a0812b175ebef52b08d1f07dba037994210
6
+ metadata.gz: 57ff4ab92abbd8f2a5cbbedb7a88f3d7a517e4c1c779810f1259b58cc60c3591c43e9d4782984ab56a1389b00ee3acbaebc6298208813b694a5f73255bc3c0a7
7
+ data.tar.gz: 8baea1e77f32c09724a03452ba6018721681e9832d03b23bbcf1bc61609a56cabd8098d114dd49c66c468919b1d1c4cee4cc97212a6ac97fac229bc8f4eb886b
@@ -44,6 +44,6 @@ jobs:
44
44
  - uses: actions/checkout@v2
45
45
 
46
46
  - name: Release Gem
47
- uses: CvX/publish-rubygems-action@master
47
+ uses: discourse/publish-rubygems-action@main
48
48
  env:
49
49
  RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}}
data/lib/onebox.rb CHANGED
@@ -21,8 +21,7 @@ module Onebox
21
21
  allowed_ports: [80, 443],
22
22
  allowed_schemes: ["http", "https"],
23
23
  sanitize_config: Sanitize::Config::ONEBOX,
24
- redirect_limit: 5,
25
- disable_media_download_controls: false
24
+ redirect_limit: 5
26
25
  }
27
26
 
28
27
  @@options = DEFAULTS
@@ -227,8 +227,10 @@ module Onebox
227
227
  d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
228
228
  d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
229
229
  d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
230
+ d[:image] = nil if Onebox::Helpers.blank?(d[:image])
230
231
 
231
232
  d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
233
+ d[:video] = nil if Onebox::Helpers.blank?(d[:video])
232
234
 
233
235
  d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
234
236
  if !Onebox::Helpers.blank?(d[:published_time])
@@ -11,11 +11,25 @@ module Onebox
11
11
  include HTML
12
12
 
13
13
  always_https
14
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx)\//)
14
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
15
15
 
16
16
  def url
17
+ # If possible, fetch the cached HTML body immediately so we can
18
+ # try to grab the canonical URL from that document,
19
+ # rather than guess at the best URL structure to use
20
+ if body_cacher&.respond_to?('cache_response_body?')
21
+ if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
22
+ @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
23
+ end
24
+ end
25
+
26
+ if @raw
27
+ canonical_link = @raw.at('//link[@rel="canonical"]/@href')
28
+ return canonical_link.to_s if canonical_link
29
+ end
30
+
17
31
  if match && match[:id]
18
- return "https://www.amazon.#{tld}/gp/aw/d/#{Onebox::Helpers.uri_encode(match[:id])}"
32
+ return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
19
33
  end
20
34
 
21
35
  @url
@@ -26,16 +40,15 @@ module Onebox
26
40
  end
27
41
 
28
42
  def http_params
29
- {
30
- 'User-Agent' =>
31
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
32
- }
43
+ if @options && @options[:user_agent]
44
+ { 'User-Agent' => @options[:user_agent] }
45
+ end
33
46
  end
34
47
 
35
48
  private
36
49
 
37
50
  def match
38
- @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[^\/]+)(?:\/|$)/mi)
51
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
39
52
  end
40
53
 
41
54
  def image
@@ -50,6 +63,10 @@ module Onebox
50
63
  end
51
64
 
52
65
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
66
+ attributes = landing_image.first.attributes
67
+
68
+ return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
69
+
53
70
  landing_image.first["src"].to_s
54
71
  end
55
72
 
@@ -100,7 +117,7 @@ module Onebox
100
117
  end
101
118
 
102
119
  result = {
103
- link: link,
120
+ link: url,
104
121
  title: title,
105
122
  by_info: authors,
106
123
  image: og.image || image,
@@ -131,7 +148,7 @@ module Onebox
131
148
  end
132
149
 
133
150
  result = {
134
- link: link,
151
+ link: url,
135
152
  title: title,
136
153
  by_info: authors,
137
154
  image: og.image || image,
@@ -147,7 +164,7 @@ module Onebox
147
164
  else
148
165
  title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
149
166
  result = {
150
- link: link,
167
+ link: url,
151
168
  title: title,
152
169
  image: og.image || image,
153
170
  price: price
@@ -157,7 +174,10 @@ module Onebox
157
174
  result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
158
175
 
159
176
  summary = raw.at("#productDescription")
160
- result[:description] = og.description || (summary && summary.inner_text) || CGI.unescapeHTML(Onebox::Helpers.truncate(raw.css("meta[name=description]").first["content"], 250))
177
+
178
+ description = og.description || summary&.inner_text
179
+ description ||= raw.css("meta[name=description]").first&.[]("content")
180
+ result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
161
181
  end
162
182
 
163
183
  result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
@@ -63,7 +63,7 @@ module Onebox
63
63
 
64
64
  def nokogiri_page
65
65
  @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
66
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
67
  Nokogiri::HTML(response)
68
68
  end
69
69
  end
@@ -47,7 +47,7 @@ module Onebox
47
47
  end
48
48
 
49
49
  def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, 10) rescue nil
50
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
51
  html = Nokogiri::HTML(response)
52
52
  og_data = {}
53
53
  html.css('meta').each do |m|
@@ -11,7 +11,11 @@ module Onebox
11
11
  end
12
12
 
13
13
  def raw
14
- @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
14
+ @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
15
+ end
16
+
17
+ def body_cacher
18
+ self.options&.[](:body_cacher)
15
19
  end
16
20
 
17
21
  def html?
@@ -31,7 +31,7 @@ module Onebox
31
31
 
32
32
  def lines
33
33
  return @lines if @lines
34
- response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
34
+ response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
35
35
  @lines = response.split("\n")
36
36
  end
37
37
 
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -13,11 +13,36 @@ module Onebox
13
13
  WIDTH ||= 480
14
14
  HEIGHT ||= 360
15
15
 
16
- def placeholder_html
17
- og = get_opengraph.data
16
+ def parse_embed_response
17
+ return unless video_id
18
+ return @parse_embed_response if defined?(@parse_embed_response)
19
+
20
+ embed_url = "https://www.youtube.com/embed/#{video_id}"
21
+ @embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
22
+
23
+ begin
24
+ script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
25
+ match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
26
+
27
+ yt_json = ::JSON.parse(match[:json])
28
+ renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
29
+
30
+ title = renderer['title']['runs'].first['text']
31
+
32
+ image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
33
+ rescue
34
+ return
35
+ end
36
+
37
+ @parse_embed_response = { image: image, title: title }
38
+ end
18
39
 
40
+ def placeholder_html
19
41
  if video_id || list_id
20
- "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
42
+ result = parse_embed_response
43
+ result ||= get_opengraph.data
44
+
45
+ "<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
21
46
  else
22
47
  to_html
23
48
  end
@@ -52,7 +77,10 @@ module Onebox
52
77
  end
53
78
 
54
79
  def video_title
55
- @video_title ||= get_opengraph.data[:title]
80
+ @video_title ||= begin
81
+ result = parse_embed_response || get_opengraph.data
82
+ result[:title]
83
+ end
56
84
  end
57
85
 
58
86
  private
@@ -7,7 +7,7 @@ module Onebox
7
7
 
8
8
  class DownloadTooLarge < StandardError; end
9
9
 
10
- IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com']
10
+ IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com', 'youtube.com']
11
11
 
12
12
  def self.symbolize_keys(hash)
13
13
  return {} if hash.nil?
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -178,6 +189,10 @@ module Onebox
178
189
  url.gsub!("'", "&apos;")
179
190
  url.gsub!('"', "&quot;")
180
191
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")
192
+
193
+ parsed = Addressable::URI.parse(url)
194
+ return "" unless parsed.host
195
+
181
196
  url
182
197
  end
183
198
 
@@ -32,7 +32,8 @@ module Onebox
32
32
  if method_name.end_with?(*integer_suffixes)
33
33
  value.to_i
34
34
  elsif method_name.end_with?(*url_suffixes)
35
- ::Onebox::Helpers.normalize_url_for_output(value)
35
+ result = Onebox::Helpers.normalize_url_for_output(value)
36
+ result unless Onebox::Helpers::blank?(result)
36
37
  else
37
38
  value
38
39
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Onebox
4
- VERSION = "2.2.5"
4
+ VERSION = "2.2.11"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.5
4
+ version: 2.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-02-25 00:00:00.000000000 Z
13
+ date: 2021-04-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: addressable