onebox 2.2.8 → 2.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -4
  3. data/lib/onebox/engine.rb +1 -1
  4. data/lib/onebox/engine/allowlisted_generic_onebox.rb +11 -9
  5. data/lib/onebox/engine/amazon_onebox.rb +34 -12
  6. data/lib/onebox/engine/asciinema_onebox.rb +0 -1
  7. data/lib/onebox/engine/audioboom_onebox.rb +5 -2
  8. data/lib/onebox/engine/bandcamp_onebox.rb +8 -8
  9. data/lib/onebox/engine/cloudapp_onebox.rb +12 -12
  10. data/lib/onebox/engine/coub_onebox.rb +0 -1
  11. data/lib/onebox/engine/facebook_media_onebox.rb +8 -7
  12. data/lib/onebox/engine/five_hundred_px_onebox.rb +0 -1
  13. data/lib/onebox/engine/flickr_onebox.rb +14 -15
  14. data/lib/onebox/engine/gfycat_onebox.rb +1 -1
  15. data/lib/onebox/engine/giphy_onebox.rb +0 -1
  16. data/lib/onebox/engine/github_blob_onebox.rb +4 -0
  17. data/lib/onebox/engine/github_commit_onebox.rb +2 -3
  18. data/lib/onebox/engine/github_gist_onebox.rb +1 -2
  19. data/lib/onebox/engine/github_issue_onebox.rb +16 -18
  20. data/lib/onebox/engine/github_pullrequest_onebox.rb +9 -1
  21. data/lib/onebox/engine/gitlab_blob_onebox.rb +4 -0
  22. data/lib/onebox/engine/google_calendar_onebox.rb +1 -1
  23. data/lib/onebox/engine/google_docs_onebox.rb +8 -8
  24. data/lib/onebox/engine/google_drive_onebox.rb +7 -7
  25. data/lib/onebox/engine/google_maps_onebox.rb +0 -2
  26. data/lib/onebox/engine/google_photos_onebox.rb +14 -14
  27. data/lib/onebox/engine/google_play_app_onebox.rb +3 -7
  28. data/lib/onebox/engine/html.rb +5 -1
  29. data/lib/onebox/engine/instagram_onebox.rb +2 -2
  30. data/lib/onebox/engine/kaltura_onebox.rb +8 -6
  31. data/lib/onebox/engine/opengraph_image.rb +0 -1
  32. data/lib/onebox/engine/pastebin_onebox.rb +1 -1
  33. data/lib/onebox/engine/pubmed_onebox.rb +3 -2
  34. data/lib/onebox/engine/replit_onebox.rb +0 -1
  35. data/lib/onebox/engine/sketchfab_onebox.rb +8 -8
  36. data/lib/onebox/engine/slides_onebox.rb +10 -10
  37. data/lib/onebox/engine/soundcloud_onebox.rb +0 -1
  38. data/lib/onebox/engine/stack_exchange_onebox.rb +2 -1
  39. data/lib/onebox/engine/standard_embed.rb +2 -2
  40. data/lib/onebox/engine/steam_store_onebox.rb +6 -5
  41. data/lib/onebox/engine/trello_onebox.rb +2 -2
  42. data/lib/onebox/engine/twitch_clips_onebox.rb +1 -3
  43. data/lib/onebox/engine/twitch_stream_onebox.rb +1 -2
  44. data/lib/onebox/engine/twitch_video_onebox.rb +0 -2
  45. data/lib/onebox/engine/twitter_status_onebox.rb +1 -1
  46. data/lib/onebox/engine/typeform_onebox.rb +7 -6
  47. data/lib/onebox/engine/vimeo_onebox.rb +9 -7
  48. data/lib/onebox/engine/wikimedia_onebox.rb +1 -2
  49. data/lib/onebox/engine/wikipedia_onebox.rb +12 -8
  50. data/lib/onebox/engine/youku_onebox.rb +7 -7
  51. data/lib/onebox/engine/youtube_onebox.rb +46 -17
  52. data/lib/onebox/file_type_finder.rb +0 -1
  53. data/lib/onebox/helpers.rb +25 -14
  54. data/lib/onebox/mixins/git_blob_onebox.rb +5 -3
  55. data/lib/onebox/mixins/github_body.rb +30 -0
  56. data/lib/onebox/mixins/twitch_onebox.rb +0 -1
  57. data/lib/onebox/preview.rb +1 -2
  58. data/lib/onebox/sanitize_config.rb +1 -1
  59. data/lib/onebox/version.rb +1 -1
  60. data/templates/github/github_body.mustache +3 -0
  61. data/templates/githubissue.mustache +7 -9
  62. data/templates/githubpullrequest.mustache +1 -0
  63. metadata +7 -5
@@ -16,7 +16,7 @@ module Onebox
16
16
  height = match[:type] == 'b' ? 400 : 200
17
17
 
18
18
  <<-HTML
19
- <iframe src=\"#{link}\" width=\"100%\" height=\"#{height}\" frameborder=\"0\" style=\"border:0\"></iframe>
19
+ <iframe src="#{link}" width="100%" height="#{height}" frameborder="0" style="border:0"></iframe>
20
20
  HTML
21
21
  end
22
22
 
@@ -26,7 +26,7 @@ module Onebox
26
26
 
27
27
  private
28
28
  def match
29
- return @match if @match
29
+ return @match if defined?(@match)
30
30
 
31
31
  @match = @url.match(%{trello\.com/(?<type>[^/]+)/(?<key>[^/]+)/?\W*})
32
32
 
@@ -3,12 +3,11 @@
3
3
  require_relative '../mixins/twitch_onebox'
4
4
 
5
5
  class Onebox::Engine::TwitchClipsOnebox
6
-
7
6
  def self.twitch_regexp
8
7
  /^https?:\/\/clips\.twitch\.tv\/([a-zA-Z0-9_]+\/?[^#\?\/]+)/
9
8
  end
10
- include Onebox::Mixins::TwitchOnebox
11
9
 
10
+ include Onebox::Mixins::TwitchOnebox
12
11
  requires_iframe_origins "https://clips.twitch.tv"
13
12
 
14
13
  def query_params
@@ -18,5 +17,4 @@ class Onebox::Engine::TwitchClipsOnebox
18
17
  def base_url
19
18
  "clips.twitch.tv/embed?"
20
19
  end
21
-
22
20
  end
@@ -3,14 +3,13 @@
3
3
  require_relative '../mixins/twitch_onebox'
4
4
 
5
5
  class Onebox::Engine::TwitchStreamOnebox
6
-
7
6
  def self.twitch_regexp
8
7
  /^https?:\/\/(?:www\.|go\.)?twitch\.tv\/(?!directory)([a-zA-Z0-9_]{4,25})$/
9
8
  end
9
+
10
10
  include Onebox::Mixins::TwitchOnebox
11
11
 
12
12
  def query_params
13
13
  "channel=#{twitch_id}"
14
14
  end
15
-
16
15
  end
@@ -3,7 +3,6 @@
3
3
  require_relative '../mixins/twitch_onebox'
4
4
 
5
5
  class Onebox::Engine::TwitchVideoOnebox
6
-
7
6
  def self.twitch_regexp
8
7
  /^https?:\/\/(?:www\.)?twitch\.tv\/videos\/([0-9]+)/
9
8
  end
@@ -13,5 +12,4 @@ class Onebox::Engine::TwitchVideoOnebox
13
12
  def query_params
14
13
  "video=v#{twitch_id}"
15
14
  end
16
-
17
15
  end
@@ -17,7 +17,7 @@ module Onebox
17
17
  private
18
18
 
19
19
  def get_twitter_data
20
- response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
20
+ response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
21
21
  html = Nokogiri::HTML(response)
22
22
  twitter_data = {}
23
23
  html.css('meta').each do |m|
@@ -13,12 +13,13 @@ module Onebox
13
13
  typeform_src = build_typeform_src
14
14
 
15
15
  <<~HTML
16
- <iframe src="#{typeform_src}"
17
- width="100%"
18
- height="600px"
19
- scrolling="no"
20
- frameborder="0">
21
- </iframe>
16
+ <iframe
17
+ src="#{typeform_src}"
18
+ width="100%"
19
+ height="600px"
20
+ scrolling="no"
21
+ frameborder="0"
22
+ ></iframe>
22
23
  HTML
23
24
  end
24
25
 
@@ -25,14 +25,16 @@ module Onebox
25
25
  end
26
26
  video_src = "https://player.vimeo.com/video/#{video_id}"
27
27
  video_src = video_src.gsub('autoplay=1', '').chomp("?")
28
+
28
29
  <<-HTML
29
- <iframe width="#{WIDTH}"
30
- height="#{HEIGHT}"
31
- src="#{video_src}"
32
- data-original-href="#{link}"
33
- frameborder="0"
34
- allowfullscreen>
35
- </iframe>
30
+ <iframe
31
+ width="#{WIDTH}"
32
+ height="#{HEIGHT}"
33
+ src="#{video_src}"
34
+ data-original-href="#{link}"
35
+ frameborder="0"
36
+ allowfullscreen
37
+ ></iframe>
36
38
  HTML
37
39
  end
38
40
 
@@ -7,7 +7,7 @@ module Onebox
7
7
  include LayoutSupport
8
8
  include JSON
9
9
 
10
- matches_regexp /^https?:\/\/commons\.wikimedia\.org\/wiki\/(File:.+)/
10
+ matches_regexp(/^https?:\/\/commons\.wikimedia\.org\/wiki\/(File:.+)/)
11
11
  always_https
12
12
 
13
13
  def self.priority
@@ -38,7 +38,6 @@ module Onebox
38
38
  thumbnail: first_page['imageinfo'].first['thumburl']
39
39
  }
40
40
  end
41
-
42
41
  end
43
42
  end
44
43
  end
@@ -16,10 +16,10 @@ module Onebox
16
16
  paras = []
17
17
  text = ""
18
18
 
19
- # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
19
+ # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
20
20
  # Author Lidlanca
21
21
  # Date 9/8/2014
22
- if (m_url_hash = @url.match(/#([^\/?]+)/)) #extract url hash
22
+ if (m_url_hash = @url.match(/#([^\/?]+)/)) # extract url hash
23
23
  m_url_hash_name = m_url_hash[1]
24
24
  end
25
25
 
@@ -27,17 +27,17 @@ module Onebox
27
27
  section_header_title = raw.xpath("//span[@id='#{m_url_hash_name}']")
28
28
 
29
29
  if section_header_title.empty?
30
- paras = raw.search("p") #default get all the paras
30
+ paras = raw.search("p") # default get all the paras
31
31
  else
32
32
  section_title_text = section_header_title.inner_text
33
- section_header = section_header_title[0].parent #parent element of the section span element should be an <h3> node
33
+ section_header = section_header_title[0].parent # parent element of the section span element should be an <h3> node
34
34
  cur_element = section_header
35
35
 
36
- # p|text|div covers the general case. We assume presence of atleast 1 P node. if section has no P node we may end up with a P node from the next section.
36
+ # p|text|div covers the general case. We assume presence of at least 1 P node. if section has no P node we may end up with a P node from the next section.
37
37
  # div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
38
38
  # ul support will imporve the output generated for a section with a list as the main content (for example: an Author Bibliography, A musician Discography, etc)
39
39
  first_p_found = nil
40
- while (((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil?) do #from section header get the next sibling until it is a breaker tag
40
+ while (((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil?) do # from section header get the next sibling until it is a breaker tag
41
41
  cur_element = next_sibling
42
42
  if (cur_element.name == "p" || cur_element.name == "ul") #we treat a list as we detect a p to avoid showing
43
43
  first_p_found = true
@@ -46,7 +46,7 @@ module Onebox
46
46
  end
47
47
  end
48
48
  else # no hash found in url
49
- paras = raw.search("p") #default get all the paras
49
+ paras = raw.search("p") # default get all the paras
50
50
  end
51
51
 
52
52
  unless paras.empty?
@@ -55,7 +55,7 @@ module Onebox
55
55
  break if cnt >= paras.size
56
56
  text += " " unless cnt == 0
57
57
 
58
- if paras[cnt].name == "ul" #Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfourtently no newline allowed in output
58
+ if paras[cnt].name == "ul" # Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfortunately no newline allowed in output
59
59
  li_index = 1
60
60
  list_items = []
61
61
  paras[cnt].children.css("li").each { |li| list_items.push "#{li_index}." + li.inner_text ; li_index += 1 }
@@ -69,13 +69,17 @@ module Onebox
69
69
  cnt += 1
70
70
  end
71
71
  end
72
+
72
73
  text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
74
+
73
75
  result = {
74
76
  link: link,
75
77
  title: raw.css("html body h1").inner_text + (section_title_text ? " | " + section_title_text : ""), #if a section sub title exists add it to the main article title
76
78
  description: text
77
79
  }
80
+
78
81
  img = raw.css(".image img")
82
+
79
83
  if img && img.size > 0
80
84
  img.each do |i|
81
85
  src = i["src"]
@@ -21,12 +21,13 @@ module Onebox
21
21
 
22
22
  def to_html
23
23
  <<~HTML
24
- <iframe src="https://player.youku.com/embed/#{video_id}"
25
- width="640"
26
- height="430"
27
- frameborder='0'
28
- allowfullscreen>
29
- </iframe>
24
+ <iframe
25
+ src="https://player.youku.com/embed/#{video_id}"
26
+ width="640"
27
+ height="430"
28
+ frameborder='0'
29
+ allowfullscreen
30
+ ></iframe>
30
31
  HTML
31
32
  end
32
33
 
@@ -35,7 +36,6 @@ module Onebox
35
36
  def uri
36
37
  @_uri ||= URI(@url)
37
38
  end
38
-
39
39
  end
40
40
  end
41
41
  end
@@ -13,11 +13,36 @@ module Onebox
13
13
  WIDTH ||= 480
14
14
  HEIGHT ||= 360
15
15
 
16
- def placeholder_html
17
- og = get_opengraph.data
16
+ def parse_embed_response
17
+ return unless video_id
18
+ return @parse_embed_response if defined?(@parse_embed_response)
19
+
20
+ embed_url = "https://www.youtube.com/embed/#{video_id}"
21
+ @embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
22
+
23
+ begin
24
+ script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
25
+ match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
26
+
27
+ yt_json = ::JSON.parse(match[:json])
28
+ renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
29
+
30
+ title = renderer['title']['runs'].first['text']
31
+
32
+ image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
33
+ rescue
34
+ return
35
+ end
18
36
 
37
+ @parse_embed_response = { image: image, title: title }
38
+ end
39
+
40
+ def placeholder_html
19
41
  if video_id || list_id
20
- "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
42
+ result = parse_embed_response
43
+ result ||= get_opengraph.data
44
+
45
+ "<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
21
46
  else
22
47
  to_html
23
48
  end
@@ -26,21 +51,23 @@ module Onebox
26
51
  def to_html
27
52
  if video_id
28
53
  <<-HTML
29
- <iframe width="#{WIDTH}"
30
- height="#{HEIGHT}"
31
- src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
32
- frameborder="0"
33
- allowfullscreen>
34
- </iframe>
54
+ <iframe
55
+ src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
56
+ width="#{WIDTH}"
57
+ height="#{HEIGHT}"
58
+ frameborder="0"
59
+ allowfullscreen
60
+ ></iframe>
35
61
  HTML
36
62
  elsif list_id
37
63
  <<-HTML
38
- <iframe width="#{WIDTH}"
39
- height="#{HEIGHT}"
40
- src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
41
- frameborder="0"
42
- allowfullscreen>
43
- </iframe>
64
+ <iframe
65
+ src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
66
+ width="#{WIDTH}"
67
+ height="#{HEIGHT}"
68
+ frameborder="0"
69
+ allowfullscreen
70
+ ></iframe>
44
71
  HTML
45
72
  else
46
73
  # for channel pages
@@ -52,7 +79,10 @@ module Onebox
52
79
  end
53
80
 
54
81
  def video_title
55
- @video_title ||= get_opengraph.data[:title]
82
+ @video_title ||= begin
83
+ result = parse_embed_response || get_opengraph.data
84
+ result[:title]
85
+ end
56
86
  end
57
87
 
58
88
  private
@@ -138,7 +168,6 @@ module Onebox
138
168
  rescue
139
169
  {}
140
170
  end
141
-
142
171
  end
143
172
  end
144
173
  end
@@ -2,7 +2,6 @@
2
2
 
3
3
  module Onebox
4
4
  module FileTypeFinder
5
-
6
5
  # In general, most of file extension names would be recognized
7
6
  # by Highlights.js. However, some need to be checked in other
8
7
  # ways, either because they just aren't included, because they
@@ -7,7 +7,7 @@ module Onebox
7
7
 
8
8
  class DownloadTooLarge < StandardError; end
9
9
 
10
- IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com']
10
+ IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com', 'youtube.com']
11
11
 
12
12
  def self.symbolize_keys(hash)
13
13
  return {} if hash.nil?
@@ -24,8 +24,8 @@ module Onebox
24
24
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
25
25
  end
26
26
 
27
- def self.fetch_html_doc(url, headers = nil)
28
- response = (fetch_response(url, nil, nil, headers) rescue nil)
27
+ def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
28
+ response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
29
29
  doc = Nokogiri::HTML(response)
30
30
  uri = Addressable::URI.parse(url)
31
31
 
@@ -37,7 +37,7 @@ module Onebox
37
37
  canonical_link = doc.at('//link[@rel="canonical"]/@href')
38
38
  canonical_uri = Addressable::URI.parse(canonical_link)
39
39
  if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
40
- response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
40
+ response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
41
41
  doc = Nokogiri::HTML(response) if response
42
42
  end
43
43
  end
@@ -45,16 +45,23 @@ module Onebox
45
45
  doc
46
46
  end
47
47
 
48
- def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
48
+ def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
49
+ redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
49
50
 
50
- limit ||= 5
51
- limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
52
-
53
- raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
51
+ raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
54
52
 
55
53
  uri = Addressable::URI.parse(location)
56
54
  uri = Addressable::URI.join(domain, uri) if !uri.host
57
55
 
56
+ use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
57
+ if use_body_cacher
58
+ response_body = body_cacher.fetch_cached_response_body(uri.to_s)
59
+
60
+ if response_body.present?
61
+ return response_body
62
+ end
63
+ end
64
+
58
65
  result = StringIO.new
59
66
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
60
67
  http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
86
93
  response.error! unless [301, 302].include?(code)
87
94
  return fetch_response(
88
95
  response['location'],
89
- limit - 1,
90
- "#{uri.scheme}://#{uri.host}",
91
- redir_header
96
+ redirect_limit: redirect_limit - 1,
97
+ domain: "#{uri.scheme}://#{uri.host}",
98
+ headers: redir_header
92
99
  )
93
100
  end
94
101
 
@@ -98,6 +105,10 @@ module Onebox
98
105
  raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
99
106
  end
100
107
 
108
+ if use_body_cacher && body_cacher.cache_response_body?(uri)
109
+ body_cacher.cache_response_body(uri.to_s, result.string)
110
+ end
111
+
101
112
  return result.string
102
113
  end
103
114
  end
@@ -116,10 +127,10 @@ module Onebox
116
127
 
117
128
  http.request_head([uri.path, uri.query].join("?")) do |response|
118
129
  code = response.code.to_i
119
- unless code === 200 || Onebox::Helpers.blank?(response.header['content-length'])
130
+ unless code === 200 || Onebox::Helpers.blank?(response.content_length)
120
131
  return nil
121
132
  end
122
- return response.header['content-length']
133
+ return response.content_length
123
134
  end
124
135
  end
125
136
  end
@@ -33,6 +33,10 @@ module Onebox
33
33
 
34
34
  self.options = DEFAULTS
35
35
 
36
+ @selected_lines_array = nil
37
+ @selected_one_liner = 0
38
+ @model_file = nil
39
+
36
40
  # Define constant after merging options set in Onebox.options
37
41
  # We can define constant automatically.
38
42
  options.each_pair do |constant_name, value|
@@ -47,8 +51,6 @@ module Onebox
47
51
  end
48
52
 
49
53
  private
50
- @selected_lines_array = nil
51
- @selected_one_liner = 0
52
54
 
53
55
  def calc_range(m, contents_lines_size)
54
56
  truncated = false
@@ -150,7 +152,7 @@ module Onebox
150
152
  end
151
153
 
152
154
  def raw
153
- return @raw if @raw
155
+ return @raw if defined?(@raw)
154
156
 
155
157
  m = @url.match(self.raw_regexp)
156
158