onebox 2.2.10 → 2.2.15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -4
  3. data/lib/onebox/engine.rb +7 -11
  4. data/lib/onebox/engine/allowlisted_generic_onebox.rb +11 -18
  5. data/lib/onebox/engine/amazon_onebox.rb +26 -17
  6. data/lib/onebox/engine/asciinema_onebox.rb +0 -1
  7. data/lib/onebox/engine/audioboom_onebox.rb +5 -2
  8. data/lib/onebox/engine/bandcamp_onebox.rb +8 -8
  9. data/lib/onebox/engine/cloudapp_onebox.rb +12 -12
  10. data/lib/onebox/engine/coub_onebox.rb +0 -1
  11. data/lib/onebox/engine/facebook_media_onebox.rb +8 -7
  12. data/lib/onebox/engine/five_hundred_px_onebox.rb +0 -1
  13. data/lib/onebox/engine/flickr_onebox.rb +14 -15
  14. data/lib/onebox/engine/gfycat_onebox.rb +26 -26
  15. data/lib/onebox/engine/giphy_onebox.rb +0 -1
  16. data/lib/onebox/engine/github_blob_onebox.rb +4 -0
  17. data/lib/onebox/engine/github_commit_onebox.rb +13 -16
  18. data/lib/onebox/engine/github_folder_onebox.rb +1 -1
  19. data/lib/onebox/engine/github_gist_onebox.rb +1 -2
  20. data/lib/onebox/engine/github_issue_onebox.rb +16 -18
  21. data/lib/onebox/engine/github_pullrequest_onebox.rb +9 -1
  22. data/lib/onebox/engine/gitlab_blob_onebox.rb +4 -0
  23. data/lib/onebox/engine/google_calendar_onebox.rb +1 -1
  24. data/lib/onebox/engine/google_docs_onebox.rb +23 -41
  25. data/lib/onebox/engine/google_drive_onebox.rb +7 -7
  26. data/lib/onebox/engine/google_maps_onebox.rb +10 -8
  27. data/lib/onebox/engine/google_photos_onebox.rb +18 -18
  28. data/lib/onebox/engine/google_play_app_onebox.rb +3 -7
  29. data/lib/onebox/engine/imgur_onebox.rb +2 -2
  30. data/lib/onebox/engine/instagram_onebox.rb +4 -5
  31. data/lib/onebox/engine/kaltura_onebox.rb +8 -6
  32. data/lib/onebox/engine/opengraph_image.rb +0 -1
  33. data/lib/onebox/engine/pastebin_onebox.rb +11 -15
  34. data/lib/onebox/engine/pdf_onebox.rb +7 -15
  35. data/lib/onebox/engine/pubmed_onebox.rb +17 -12
  36. data/lib/onebox/engine/replit_onebox.rb +0 -1
  37. data/lib/onebox/engine/sketchfab_onebox.rb +8 -8
  38. data/lib/onebox/engine/slides_onebox.rb +10 -10
  39. data/lib/onebox/engine/soundcloud_onebox.rb +0 -1
  40. data/lib/onebox/engine/stack_exchange_onebox.rb +3 -2
  41. data/lib/onebox/engine/standard_embed.rb +2 -5
  42. data/lib/onebox/engine/steam_store_onebox.rb +6 -5
  43. data/lib/onebox/engine/trello_onebox.rb +4 -7
  44. data/lib/onebox/engine/twitch_clips_onebox.rb +1 -3
  45. data/lib/onebox/engine/twitch_stream_onebox.rb +1 -2
  46. data/lib/onebox/engine/twitch_video_onebox.rb +0 -2
  47. data/lib/onebox/engine/typeform_onebox.rb +7 -6
  48. data/lib/onebox/engine/vimeo_onebox.rb +9 -7
  49. data/lib/onebox/engine/wikimedia_onebox.rb +1 -2
  50. data/lib/onebox/engine/wikipedia_onebox.rb +12 -8
  51. data/lib/onebox/engine/youku_onebox.rb +7 -13
  52. data/lib/onebox/engine/youtube_onebox.rb +46 -17
  53. data/lib/onebox/file_type_finder.rb +0 -1
  54. data/lib/onebox/helpers.rb +5 -4
  55. data/lib/onebox/layout.rb +2 -14
  56. data/lib/onebox/matcher.rb +10 -8
  57. data/lib/onebox/mixins/git_blob_onebox.rb +8 -8
  58. data/lib/onebox/mixins/github_body.rb +30 -0
  59. data/lib/onebox/mixins/twitch_onebox.rb +0 -1
  60. data/lib/onebox/open_graph.rb +4 -4
  61. data/lib/onebox/preview.rb +3 -4
  62. data/lib/onebox/sanitize_config.rb +1 -1
  63. data/lib/onebox/version.rb +1 -1
  64. data/templates/_layout.mustache +6 -2
  65. data/templates/allowlistedgeneric.mustache +8 -9
  66. data/templates/amazon.mustache +5 -2
  67. data/templates/github/github_body.mustache +5 -0
  68. data/templates/githubblob.mustache +44 -34
  69. data/templates/githubcommit.mustache +2 -10
  70. data/templates/githubfolder.mustache +2 -2
  71. data/templates/githubgist.mustache +9 -6
  72. data/templates/githubissue.mustache +9 -11
  73. data/templates/githubpullrequest.mustache +3 -2
  74. data/templates/gitlabblob.mustache +11 -4
  75. data/templates/googledocs.mustache +2 -2
  76. data/templates/googledrive.mustache +2 -2
  77. data/templates/googleplayapp.mustache +2 -1
  78. data/templates/instagram.mustache +1 -1
  79. data/templates/pastebin.mustache +6 -2
  80. data/templates/pdf.mustache +6 -3
  81. data/templates/stackexchange.mustache +1 -0
  82. data/templates/twitterstatus.mustache +20 -5
  83. data/templates/wikimedia.mustache +2 -2
  84. data/templates/wikipedia.mustache +2 -2
  85. data/templates/xkcd.mustache +2 -2
  86. metadata +4 -2
@@ -3,14 +3,13 @@
3
3
  require_relative '../mixins/twitch_onebox'
4
4
 
5
5
  class Onebox::Engine::TwitchStreamOnebox
6
-
7
6
  def self.twitch_regexp
8
7
  /^https?:\/\/(?:www\.|go\.)?twitch\.tv\/(?!directory)([a-zA-Z0-9_]{4,25})$/
9
8
  end
9
+
10
10
  include Onebox::Mixins::TwitchOnebox
11
11
 
12
12
  def query_params
13
13
  "channel=#{twitch_id}"
14
14
  end
15
-
16
15
  end
@@ -3,7 +3,6 @@
3
3
  require_relative '../mixins/twitch_onebox'
4
4
 
5
5
  class Onebox::Engine::TwitchVideoOnebox
6
-
7
6
  def self.twitch_regexp
8
7
  /^https?:\/\/(?:www\.)?twitch\.tv\/videos\/([0-9]+)/
9
8
  end
@@ -13,5 +12,4 @@ class Onebox::Engine::TwitchVideoOnebox
13
12
  def query_params
14
13
  "video=v#{twitch_id}"
15
14
  end
16
-
17
15
  end
@@ -13,12 +13,13 @@ module Onebox
13
13
  typeform_src = build_typeform_src
14
14
 
15
15
  <<~HTML
16
- <iframe src="#{typeform_src}"
17
- width="100%"
18
- height="600px"
19
- scrolling="no"
20
- frameborder="0">
21
- </iframe>
16
+ <iframe
17
+ src="#{typeform_src}"
18
+ width="100%"
19
+ height="600px"
20
+ scrolling="no"
21
+ frameborder="0"
22
+ ></iframe>
22
23
  HTML
23
24
  end
24
25
 
@@ -25,14 +25,16 @@ module Onebox
25
25
  end
26
26
  video_src = "https://player.vimeo.com/video/#{video_id}"
27
27
  video_src = video_src.gsub('autoplay=1', '').chomp("?")
28
+
28
29
  <<-HTML
29
- <iframe width="#{WIDTH}"
30
- height="#{HEIGHT}"
31
- src="#{video_src}"
32
- data-original-href="#{link}"
33
- frameborder="0"
34
- allowfullscreen>
35
- </iframe>
30
+ <iframe
31
+ width="#{WIDTH}"
32
+ height="#{HEIGHT}"
33
+ src="#{video_src}"
34
+ data-original-href="#{link}"
35
+ frameborder="0"
36
+ allowfullscreen
37
+ ></iframe>
36
38
  HTML
37
39
  end
38
40
 
@@ -7,7 +7,7 @@ module Onebox
7
7
  include LayoutSupport
8
8
  include JSON
9
9
 
10
- matches_regexp /^https?:\/\/commons\.wikimedia\.org\/wiki\/(File:.+)/
10
+ matches_regexp(/^https?:\/\/commons\.wikimedia\.org\/wiki\/(File:.+)/)
11
11
  always_https
12
12
 
13
13
  def self.priority
@@ -38,7 +38,6 @@ module Onebox
38
38
  thumbnail: first_page['imageinfo'].first['thumburl']
39
39
  }
40
40
  end
41
-
42
41
  end
43
42
  end
44
43
  end
@@ -16,10 +16,10 @@ module Onebox
16
16
  paras = []
17
17
  text = ""
18
18
 
19
- # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
19
+ # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
20
20
  # Author Lidlanca
21
21
  # Date 9/8/2014
22
- if (m_url_hash = @url.match(/#([^\/?]+)/)) #extract url hash
22
+ if (m_url_hash = @url.match(/#([^\/?]+)/)) # extract url hash
23
23
  m_url_hash_name = m_url_hash[1]
24
24
  end
25
25
 
@@ -27,17 +27,17 @@ module Onebox
27
27
  section_header_title = raw.xpath("//span[@id='#{m_url_hash_name}']")
28
28
 
29
29
  if section_header_title.empty?
30
- paras = raw.search("p") #default get all the paras
30
+ paras = raw.search("p") # default get all the paras
31
31
  else
32
32
  section_title_text = section_header_title.inner_text
33
- section_header = section_header_title[0].parent #parent element of the section span element should be an <h3> node
33
+ section_header = section_header_title[0].parent # parent element of the section span element should be an <h3> node
34
34
  cur_element = section_header
35
35
 
36
- # p|text|div covers the general case. We assume presence of atleast 1 P node. if section has no P node we may end up with a P node from the next section.
36
+ # p|text|div covers the general case. We assume presence of at least 1 P node. if section has no P node we may end up with a P node from the next section.
37
37
  # div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
38
38
  # ul support will imporve the output generated for a section with a list as the main content (for example: an Author Bibliography, A musician Discography, etc)
39
39
  first_p_found = nil
40
- while (((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil?) do #from section header get the next sibling until it is a breaker tag
40
+ while (((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil?) do # from section header get the next sibling until it is a breaker tag
41
41
  cur_element = next_sibling
42
42
  if (cur_element.name == "p" || cur_element.name == "ul") #we treat a list as we detect a p to avoid showing
43
43
  first_p_found = true
@@ -46,7 +46,7 @@ module Onebox
46
46
  end
47
47
  end
48
48
  else # no hash found in url
49
- paras = raw.search("p") #default get all the paras
49
+ paras = raw.search("p") # default get all the paras
50
50
  end
51
51
 
52
52
  unless paras.empty?
@@ -55,7 +55,7 @@ module Onebox
55
55
  break if cnt >= paras.size
56
56
  text += " " unless cnt == 0
57
57
 
58
- if paras[cnt].name == "ul" #Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfourtently no newline allowed in output
58
+ if paras[cnt].name == "ul" # Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfortunately no newline allowed in output
59
59
  li_index = 1
60
60
  list_items = []
61
61
  paras[cnt].children.css("li").each { |li| list_items.push "#{li_index}." + li.inner_text ; li_index += 1 }
@@ -69,13 +69,17 @@ module Onebox
69
69
  cnt += 1
70
70
  end
71
71
  end
72
+
72
73
  text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
74
+
73
75
  result = {
74
76
  link: link,
75
77
  title: raw.css("html body h1").inner_text + (section_title_text ? " | " + section_title_text : ""), #if a section sub title exists add it to the main article title
76
78
  description: text
77
79
  }
80
+
78
81
  img = raw.css(".image img")
82
+
79
83
  if img && img.size > 0
80
84
  img.each do |i|
81
85
  src = i["src"]
@@ -21,21 +21,15 @@ module Onebox
21
21
 
22
22
  def to_html
23
23
  <<~HTML
24
- <iframe src="https://player.youku.com/embed/#{video_id}"
25
- width="640"
26
- height="430"
27
- frameborder='0'
28
- allowfullscreen>
29
- </iframe>
24
+ <iframe
25
+ src="https://player.youku.com/embed/#{video_id}"
26
+ width="640"
27
+ height="430"
28
+ frameborder='0'
29
+ allowfullscreen
30
+ ></iframe>
30
31
  HTML
31
32
  end
32
-
33
- private
34
-
35
- def uri
36
- @_uri ||= URI(@url)
37
- end
38
-
39
33
  end
40
34
  end
41
35
  end
@@ -13,11 +13,36 @@ module Onebox
13
13
  WIDTH ||= 480
14
14
  HEIGHT ||= 360
15
15
 
16
- def placeholder_html
17
- og = get_opengraph.data
16
+ def parse_embed_response
17
+ return unless video_id
18
+ return @parse_embed_response if defined?(@parse_embed_response)
19
+
20
+ embed_url = "https://www.youtube.com/embed/#{video_id}"
21
+ @embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
22
+
23
+ begin
24
+ script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
25
+ match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
26
+
27
+ yt_json = ::JSON.parse(match[:json])
28
+ renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
29
+
30
+ title = renderer['title']['runs'].first['text']
31
+
32
+ image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
33
+ rescue
34
+ return
35
+ end
18
36
 
37
+ @parse_embed_response = { image: image, title: title }
38
+ end
39
+
40
+ def placeholder_html
19
41
  if video_id || list_id
20
- "<img src='#{og[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{og[:title]}'>"
42
+ result = parse_embed_response
43
+ result ||= get_opengraph.data
44
+
45
+ "<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
21
46
  else
22
47
  to_html
23
48
  end
@@ -26,21 +51,23 @@ module Onebox
26
51
  def to_html
27
52
  if video_id
28
53
  <<-HTML
29
- <iframe width="#{WIDTH}"
30
- height="#{HEIGHT}"
31
- src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
32
- frameborder="0"
33
- allowfullscreen>
34
- </iframe>
54
+ <iframe
55
+ src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
56
+ width="#{WIDTH}"
57
+ height="#{HEIGHT}"
58
+ frameborder="0"
59
+ allowfullscreen
60
+ ></iframe>
35
61
  HTML
36
62
  elsif list_id
37
63
  <<-HTML
38
- <iframe width="#{WIDTH}"
39
- height="#{HEIGHT}"
40
- src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
41
- frameborder="0"
42
- allowfullscreen>
43
- </iframe>
64
+ <iframe
65
+ src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
66
+ width="#{WIDTH}"
67
+ height="#{HEIGHT}"
68
+ frameborder="0"
69
+ allowfullscreen
70
+ ></iframe>
44
71
  HTML
45
72
  else
46
73
  # for channel pages
@@ -52,7 +79,10 @@ module Onebox
52
79
  end
53
80
 
54
81
  def video_title
55
- @video_title ||= get_opengraph.data[:title]
82
+ @video_title ||= begin
83
+ result = parse_embed_response || get_opengraph.data
84
+ result[:title]
85
+ end
56
86
  end
57
87
 
58
88
  private
@@ -138,7 +168,6 @@ module Onebox
138
168
  rescue
139
169
  {}
140
170
  end
141
-
142
171
  end
143
172
  end
144
173
  end
@@ -2,7 +2,6 @@
2
2
 
3
3
  module Onebox
4
4
  module FileTypeFinder
5
-
6
5
  # In general, most of file extension names would be recognized
7
6
  # by Highlights.js. However, some need to be checked in other
8
7
  # ways, either because they just aren't included, because they
@@ -7,7 +7,7 @@ module Onebox
7
7
 
8
8
  class DownloadTooLarge < StandardError; end
9
9
 
10
- IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com']
10
+ IGNORE_CANONICAL_DOMAINS ||= ['www.instagram.com', 'youtube.com']
11
11
 
12
12
  def self.symbolize_keys(hash)
13
13
  return {} if hash.nil?
@@ -90,7 +90,8 @@ module Onebox
90
90
 
91
91
  code = response.code.to_i
92
92
  unless code === 200
93
- response.error! unless [301, 302].include?(code)
93
+ response.error! unless [301, 302, 303, 307, 308].include?(code)
94
+
94
95
  return fetch_response(
95
96
  response['location'],
96
97
  redirect_limit: redirect_limit - 1,
@@ -127,10 +128,10 @@ module Onebox
127
128
 
128
129
  http.request_head([uri.path, uri.query].join("?")) do |response|
129
130
  code = response.code.to_i
130
- unless code === 200 || Onebox::Helpers.blank?(response.header['content-length'])
131
+ unless code === 200 || Onebox::Helpers.blank?(response.content_length)
131
132
  return nil
132
133
  end
133
- return response.header['content-length']
134
+ return response.content_length
134
135
  end
135
136
  end
136
137
  end
data/lib/onebox/layout.rb CHANGED
@@ -32,19 +32,7 @@ module Onebox
32
32
  private
33
33
 
34
34
  def uri
35
- @uri = URI(link)
36
- end
37
-
38
- def checksum
39
- @md5.hexdigest("#{VERSION}:#{link}")
40
- end
41
-
42
- def link
43
- ::Onebox::Helpers.normalize_url_for_output(record[:link])
44
- end
45
-
46
- def domain
47
- record[:domain] || URI(link || '').host.to_s.sub(/^www\./, '')
35
+ @uri ||= URI(::Onebox::Helpers.normalize_url_for_output(record[:link]))
48
36
  end
49
37
 
50
38
  def details
@@ -52,7 +40,7 @@ module Onebox
52
40
  link: record[:link],
53
41
  title: record[:title],
54
42
  favicon: record[:favicon],
55
- domain: domain,
43
+ domain: record[:domain] || uri.host.to_s.sub(/^www\./, ''),
56
44
  article_published_time: record[:article_published_time],
57
45
  article_published_time_title: record[:article_published_time_title],
58
46
  metadata_1_label: record[:metadata_1_label],
@@ -2,8 +2,12 @@
2
2
 
3
3
  module Onebox
4
4
  class Matcher
5
- def initialize(link, options = {})
6
- @url = link
5
+ def initialize(url, options = {})
6
+ begin
7
+ @uri = URI(url)
8
+ rescue URI::InvalidURIError
9
+ end
10
+
7
11
  @options = options
8
12
  end
9
13
 
@@ -14,12 +18,10 @@ module Onebox
14
18
  end
15
19
 
16
20
  def oneboxed
17
- uri = URI(@url)
18
- return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
19
- return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
20
- ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) }
21
- rescue URI::InvalidURIError
22
- nil
21
+ return if @uri.nil?
22
+ return if @uri.port && !Onebox.options.allowed_ports.include?(@uri.port)
23
+ return if @uri.scheme && !Onebox.options.allowed_schemes.include?(@uri.scheme)
24
+ ordered_engines.find { |engine| engine === @uri && has_allowed_iframe_origins?(engine) }
23
25
  end
24
26
 
25
27
  def has_allowed_iframe_origins?(engine)
@@ -25,14 +25,18 @@ module Onebox
25
25
  }
26
26
 
27
27
  module InstanceMethods
28
- def initialize(link, timeout = nil)
29
- super link, timeout
28
+ def initialize(url, timeout = nil)
29
+ super url, timeout
30
30
  # merge engine options from global Onebox.options interface
31
31
  # self.options = Onebox.options["GithubBlobOnebox"] # self.class.name.split("::").last.to_s
32
32
  # self.options = Onebox.options[self.class.name.split("::").last.to_s] #We can use this a more generic approach. extract the engine class name automatically
33
33
 
34
34
  self.options = DEFAULTS
35
35
 
36
+ @selected_lines_array = nil
37
+ @selected_one_liner = 0
38
+ @model_file = nil
39
+
36
40
  # Define constant after merging options set in Onebox.options
37
41
  # We can define constant automatically.
38
42
  options.each_pair do |constant_name, value|
@@ -47,8 +51,6 @@ module Onebox
47
51
  end
48
52
 
49
53
  private
50
- @selected_lines_array = nil
51
- @selected_one_liner = 0
52
54
 
53
55
  def calc_range(m, contents_lines_size)
54
56
  truncated = false
@@ -150,7 +152,7 @@ module Onebox
150
152
  end
151
153
 
152
154
  def raw
153
- return @raw if @raw
155
+ return @raw if defined?(@raw)
154
156
 
155
157
  m = @url.match(self.raw_regexp)
156
158
 
@@ -161,11 +163,9 @@ module Onebox
161
163
  @file = m[:file]
162
164
  @lang = Onebox::FileTypeFinder.from_file_name(m[:file])
163
165
 
164
- if @lang == "stl" && link.match(/^https?:\/\/(www\.)?github\.com.*\/blob\//)
165
-
166
+ if @lang == "stl" && link.match?(/^https?:\/\/(www\.)?github\.com.*\/blob\//)
166
167
  @model_file = @lang.dup
167
168
  @raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m)
168
-
169
169
  else
170
170
  contents = URI.open(self.raw_template(m), read_timeout: timeout).read
171
171
 
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Onebox
4
+ module Mixins
5
+ module GithubBody
6
+ def self.included(klass)
7
+ klass.include(Onebox::Engine)
8
+ klass.include(InstanceMethods)
9
+ end
10
+
11
+ module InstanceMethods
12
+ GITHUB_COMMENT_REGEX = /(<!--.*?-->\r\n)/
13
+ MAX_BODY_LENGTH = 80
14
+ def compute_body(body)
15
+ body = body.dup
16
+ excerpt = nil
17
+
18
+ body = (body || '').gsub(GITHUB_COMMENT_REGEX, '')
19
+ body = body.length > 0 ? body : nil
20
+ if body && body.length > MAX_BODY_LENGTH
21
+ excerpt = body[MAX_BODY_LENGTH..body.length].rstrip
22
+ body = body[0..MAX_BODY_LENGTH - 1]
23
+ end
24
+
25
+ [body, excerpt]
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end