html-pipeline 2.14.3 → 3.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/FUNDING.yml +11 -3
- data/.github/dependabot.yml +27 -0
- data/.github/workflows/automerge.yml +13 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/lint.yml +23 -0
- data/.github/workflows/publish.yml +19 -0
- data/.rubocop.yml +17 -0
- data/.ruby-version +1 -0
- data/.vscode/settings.json +8 -0
- data/CHANGELOG.md +128 -2
- data/Gemfile +31 -15
- data/{LICENSE → LICENSE.txt} +2 -2
- data/README.md +241 -224
- data/Rakefile +14 -7
- data/UPGRADING.md +34 -0
- data/html-pipeline.gemspec +31 -21
- data/lib/html-pipeline.rb +3 -0
- data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
- data/lib/html_pipeline/convert_filter.rb +17 -0
- data/lib/html_pipeline/filter.rb +89 -0
- data/lib/html_pipeline/node_filter/absolute_source_filter.rb +54 -0
- data/lib/html_pipeline/node_filter/asset_proxy_filter.rb +86 -0
- data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
- data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
- data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
- data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +54 -68
- data/lib/html_pipeline/node_filter/syntax_highlight_filter.rb +62 -0
- data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +70 -0
- data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
- data/lib/html_pipeline/node_filter.rb +31 -0
- data/lib/html_pipeline/sanitization_filter.rb +190 -0
- data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
- data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
- data/lib/html_pipeline/text_filter.rb +21 -0
- data/lib/html_pipeline/version.rb +5 -0
- data/lib/html_pipeline.rb +281 -0
- metadata +58 -54
- data/.travis.yml +0 -43
- data/Appraisals +0 -19
- data/CONTRIBUTING.md +0 -60
- data/bin/html-pipeline +0 -78
- data/lib/html/pipeline/@team_mention_filter.rb +0 -99
- data/lib/html/pipeline/absolute_source_filter.rb +0 -52
- data/lib/html/pipeline/autolink_filter.rb +0 -34
- data/lib/html/pipeline/body_content.rb +0 -44
- data/lib/html/pipeline/camo_filter.rb +0 -105
- data/lib/html/pipeline/email_reply_filter.rb +0 -69
- data/lib/html/pipeline/filter.rb +0 -165
- data/lib/html/pipeline/https_filter.rb +0 -29
- data/lib/html/pipeline/image_max_width_filter.rb +0 -37
- data/lib/html/pipeline/markdown_filter.rb +0 -56
- data/lib/html/pipeline/sanitization_filter.rb +0 -144
- data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
- data/lib/html/pipeline/text_filter.rb +0 -16
- data/lib/html/pipeline/textile_filter.rb +0 -25
- data/lib/html/pipeline/toc_filter.rb +0 -69
- data/lib/html/pipeline/version.rb +0 -7
- data/lib/html/pipeline.rb +0 -210
@@ -1,99 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'set'
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML filter that replaces @org/team mentions with links. Mentions within
|
8
|
-
# <pre>, <code>, <a>, <style>, and <script> elements are ignored.
|
9
|
-
#
|
10
|
-
# Context options:
|
11
|
-
# :base_url - Used to construct links to team profile pages for each
|
12
|
-
# mention.
|
13
|
-
# :team_pattern - Used to provide a custom regular expression to
|
14
|
-
# identify team names
|
15
|
-
#
|
16
|
-
class TeamMentionFilter < Filter
|
17
|
-
# Public: Find @org/team mentions in text. See
|
18
|
-
# TeamMentionFilter#team_mention_link_filter.
|
19
|
-
#
|
20
|
-
# TeamMentionFilter.mentioned_teams_in(text) do |match, org, team|
|
21
|
-
# "<a href=...>#{team}</a>"
|
22
|
-
# end
|
23
|
-
#
|
24
|
-
# text - String text to search.
|
25
|
-
#
|
26
|
-
# Yields the String match, org name, and team name. The yield's
|
27
|
-
# return replaces the match in the original text.
|
28
|
-
#
|
29
|
-
# Returns a String replaced with the return of the block.
|
30
|
-
def self.mentioned_teams_in(text, team_pattern = TeamPattern)
|
31
|
-
text.gsub team_pattern do |match|
|
32
|
-
org = $1
|
33
|
-
team = $2
|
34
|
-
yield match, org, team
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
# Default pattern used to extract team names from text. The value can be
|
39
|
-
# overridden by providing the team_pattern variable in the context. To
|
40
|
-
# properly link the mention, should be in the format of /@(1)\/(2)/.
|
41
|
-
TeamPattern = /
|
42
|
-
(?<=^|\W) # beginning of string or non-word char
|
43
|
-
@([a-z0-9][a-z0-9-]*) # @organization
|
44
|
-
\/ # dividing slash
|
45
|
-
([a-z0-9][a-z0-9\-_]*) # team
|
46
|
-
\b
|
47
|
-
/ix
|
48
|
-
|
49
|
-
# Don't look for mentions in text nodes that are children of these elements
|
50
|
-
IGNORE_PARENTS = %w[pre code a style script].to_set
|
51
|
-
|
52
|
-
def call
|
53
|
-
result[:mentioned_teams] ||= []
|
54
|
-
|
55
|
-
doc.search('.//text()').each do |node|
|
56
|
-
content = node.to_html
|
57
|
-
next unless content.include?('@')
|
58
|
-
next if has_ancestor?(node, IGNORE_PARENTS)
|
59
|
-
html = mention_link_filter(content, base_url, team_pattern)
|
60
|
-
next if html == content
|
61
|
-
node.replace(html)
|
62
|
-
end
|
63
|
-
doc
|
64
|
-
end
|
65
|
-
|
66
|
-
def team_pattern
|
67
|
-
context[:team_pattern] || TeamPattern
|
68
|
-
end
|
69
|
-
|
70
|
-
# Replace @org/team mentions in text with links to the mentioned team's
|
71
|
-
# page.
|
72
|
-
#
|
73
|
-
# text - String text to replace @mention team names in.
|
74
|
-
# base_url - The base URL used to construct team page URLs.
|
75
|
-
# team_pattern - Regular expression used to identify teams in text
|
76
|
-
#
|
77
|
-
# Returns a string with @team mentions replaced with links. All links have a
|
78
|
-
# 'team-mention' class name attached for styling.
|
79
|
-
def mention_link_filter(text, _base_url = '/', team_pattern = TeamPattern)
|
80
|
-
self.class.mentioned_teams_in(text, team_pattern) do |match, org, team|
|
81
|
-
link = link_to_mentioned_team(org, team)
|
82
|
-
|
83
|
-
link ? match.sub("@#{org}/#{team}", link) : match
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def link_to_mentioned_team(org, team)
|
88
|
-
result[:mentioned_teams] |= [team]
|
89
|
-
|
90
|
-
url = base_url.dup
|
91
|
-
url << '/' unless url =~ /[\/~]\z/
|
92
|
-
|
93
|
-
"<a href='#{url << org}/#{team}' class='team-mention'>" \
|
94
|
-
"@#{org}/#{team}" \
|
95
|
-
'</a>'
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'uri'
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
class AbsoluteSourceFilter < Filter
|
8
|
-
# HTML Filter for replacing relative and root relative image URLs with
|
9
|
-
# fully qualified URLs
|
10
|
-
#
|
11
|
-
# This is useful if an image is root relative but should really be going
|
12
|
-
# through a cdn, or if the content for the page assumes the host is known
|
13
|
-
# i.e. scraped webpages and some RSS feeds.
|
14
|
-
#
|
15
|
-
# Context options:
|
16
|
-
# :image_base_url - Base URL for image host for root relative src.
|
17
|
-
# :image_subpage_url - For relative src.
|
18
|
-
#
|
19
|
-
# This filter does not write additional information to the context.
|
20
|
-
# This filter would need to be run before CamoFilter.
|
21
|
-
def call
|
22
|
-
doc.search('img').each do |element|
|
23
|
-
next if element['src'].nil? || element['src'].empty?
|
24
|
-
src = element['src'].strip
|
25
|
-
next if src.start_with? 'http'
|
26
|
-
base = if src.start_with? '/'
|
27
|
-
image_base_url
|
28
|
-
else
|
29
|
-
image_subpage_url
|
30
|
-
end
|
31
|
-
|
32
|
-
begin
|
33
|
-
element['src'] = URI.join(base, src).to_s
|
34
|
-
rescue Exception
|
35
|
-
next
|
36
|
-
end
|
37
|
-
end
|
38
|
-
doc
|
39
|
-
end
|
40
|
-
|
41
|
-
# Private: the base url you want to use
|
42
|
-
def image_base_url
|
43
|
-
context[:image_base_url] || raise("Missing context :image_base_url for #{self.class.name}")
|
44
|
-
end
|
45
|
-
|
46
|
-
# Private: the relative url you want to use
|
47
|
-
def image_subpage_url
|
48
|
-
context[:image_subpage_url] || raise("Missing context :image_subpage_url for #{self.class.name}")
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('rinku', 'AutolinkFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML Filter for auto_linking urls in HTML.
|
8
|
-
#
|
9
|
-
# Context options:
|
10
|
-
# :autolink - boolean whether to autolink urls
|
11
|
-
# :link_mode - :all, :urls or :email_addresses
|
12
|
-
# :link_attr - HTML attributes for the link that will be generated
|
13
|
-
# :skip_tags - HTML tags inside which autolinking will be skipped.
|
14
|
-
# See Rinku.skip_tags
|
15
|
-
# :flags - additional Rinku flags. See https://github.com/vmg/rinku
|
16
|
-
#
|
17
|
-
# This filter does not write additional information to the context.
|
18
|
-
class AutolinkFilter < Filter
|
19
|
-
def call
|
20
|
-
return html if context[:autolink] == false
|
21
|
-
|
22
|
-
skip_tags = context[:skip_tags]
|
23
|
-
flags = 0
|
24
|
-
flags |= context[:flags] if context[:flags]
|
25
|
-
|
26
|
-
Rinku.auto_link(html, link_mode, context[:link_attr], skip_tags, flags)
|
27
|
-
end
|
28
|
-
|
29
|
-
def link_mode
|
30
|
-
context[:link_mode] || :urls
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# Public: Runs a String of content through an HTML processing pipeline,
|
6
|
-
# providing easy access to a generated DocumentFragment.
|
7
|
-
class BodyContent
|
8
|
-
attr_reader :result
|
9
|
-
|
10
|
-
# Public: Initialize a BodyContent.
|
11
|
-
#
|
12
|
-
# body - A String body.
|
13
|
-
# context - A Hash of context options for the filters.
|
14
|
-
# pipeline - A HTML::Pipeline object with one or more Filters.
|
15
|
-
def initialize(body, context, pipeline)
|
16
|
-
@body = body
|
17
|
-
@context = context
|
18
|
-
@pipeline = pipeline
|
19
|
-
end
|
20
|
-
|
21
|
-
# Public: Gets the memoized result of the body content as it passed through
|
22
|
-
# the Pipeline.
|
23
|
-
#
|
24
|
-
# Returns a Hash, or something similar as defined by @pipeline.result_class.
|
25
|
-
def result
|
26
|
-
@result ||= @pipeline.call @body, @context
|
27
|
-
end
|
28
|
-
|
29
|
-
# Public: Gets the updated body from the Pipeline result.
|
30
|
-
#
|
31
|
-
# Returns a String or DocumentFragment.
|
32
|
-
def output
|
33
|
-
@output ||= result[:output]
|
34
|
-
end
|
35
|
-
|
36
|
-
# Public: Parses the output into a DocumentFragment.
|
37
|
-
#
|
38
|
-
# Returns a DocumentFragment.
|
39
|
-
def document
|
40
|
-
@document ||= HTML::Pipeline.parse output
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,105 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'openssl'
|
4
|
-
require 'uri'
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
class Pipeline
|
8
|
-
# HTML Filter for replacing http image URLs with camo versions. See:
|
9
|
-
#
|
10
|
-
# https://github.com/atmos/camo
|
11
|
-
#
|
12
|
-
# All images provided in user content should be run through this
|
13
|
-
# filter so that http image sources do not cause mixed-content warnings
|
14
|
-
# in browser clients.
|
15
|
-
#
|
16
|
-
# Context options:
|
17
|
-
# :asset_proxy (required) - Base URL for constructed asset proxy URLs.
|
18
|
-
# :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
|
19
|
-
# :asset_proxy_allowlist - Array of host Strings or Regexps to skip
|
20
|
-
# src rewriting.
|
21
|
-
#
|
22
|
-
# This filter does not write additional information to the context.
|
23
|
-
class CamoFilter < Filter
|
24
|
-
# Hijacks images in the markup provided, replacing them with URLs that
|
25
|
-
# go through the github asset proxy.
|
26
|
-
def call
|
27
|
-
return doc unless asset_proxy_enabled?
|
28
|
-
|
29
|
-
doc.search('img').each do |element|
|
30
|
-
original_src = element['src']
|
31
|
-
next unless original_src
|
32
|
-
|
33
|
-
begin
|
34
|
-
uri = URI.parse(original_src)
|
35
|
-
rescue Exception
|
36
|
-
next
|
37
|
-
end
|
38
|
-
|
39
|
-
next if uri.host.nil?
|
40
|
-
next if asset_host_allowed?(uri.host)
|
41
|
-
|
42
|
-
element['src'] = asset_proxy_url(original_src)
|
43
|
-
element['data-canonical-src'] = original_src
|
44
|
-
end
|
45
|
-
doc
|
46
|
-
end
|
47
|
-
|
48
|
-
# Implementation of validate hook.
|
49
|
-
# Errors should raise exceptions or use an existing validator.
|
50
|
-
def validate
|
51
|
-
needs :asset_proxy, :asset_proxy_secret_key
|
52
|
-
end
|
53
|
-
|
54
|
-
# The camouflaged URL for a given image URL.
|
55
|
-
def asset_proxy_url(url)
|
56
|
-
"#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
|
57
|
-
end
|
58
|
-
|
59
|
-
# Private: calculate the HMAC digest for a image source URL.
|
60
|
-
def asset_url_hash(url)
|
61
|
-
OpenSSL::HMAC.hexdigest('sha1', asset_proxy_secret_key, url)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Private: Return true if asset proxy filter should be enabled
|
65
|
-
def asset_proxy_enabled?
|
66
|
-
!context[:disable_asset_proxy]
|
67
|
-
end
|
68
|
-
|
69
|
-
# Private: the host to use for generated asset proxied URLs.
|
70
|
-
def asset_proxy_host
|
71
|
-
context[:asset_proxy]
|
72
|
-
end
|
73
|
-
|
74
|
-
def asset_proxy_secret_key
|
75
|
-
context[:asset_proxy_secret_key]
|
76
|
-
end
|
77
|
-
|
78
|
-
def asset_proxy_whitelist
|
79
|
-
warn "[DEPRECATION] 'asset_proxy_whitelist' is deprecated. Please use 'asset_proxy_allowlist' instead."
|
80
|
-
asset_proxy_allowlist
|
81
|
-
end
|
82
|
-
|
83
|
-
def asset_proxy_allowlist
|
84
|
-
context[:asset_proxy_allowlist] || context[:asset_proxy_whitelist] || []
|
85
|
-
end
|
86
|
-
|
87
|
-
def asset_host_whitelisted?(host)
|
88
|
-
warn "[DEPRECATION] 'asset_host_whitelisted?' is deprecated. Please use 'asset_host_allowed?' instead."
|
89
|
-
asset_host_allowed?(host)
|
90
|
-
end
|
91
|
-
|
92
|
-
def asset_host_allowed?(host)
|
93
|
-
asset_proxy_allowlist.any? do |test|
|
94
|
-
test.is_a?(String) ? host == test : test.match(host)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
# Private: helper to hexencode a string. Each byte ends up encoded into
|
99
|
-
# two characters, zero padded value in the range [0-9a-f].
|
100
|
-
def hexencode(str)
|
101
|
-
str.unpack('H*').first
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('escape_utils', 'EmailReplyFilter')
|
4
|
-
HTML::Pipeline.require_dependency('email_reply_parser', 'EmailReplyFilter')
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
class Pipeline
|
8
|
-
# HTML Filter that converts email reply text into an HTML DocumentFragment.
|
9
|
-
# It must be used as the first filter in a pipeline.
|
10
|
-
#
|
11
|
-
# Context options:
|
12
|
-
# None
|
13
|
-
#
|
14
|
-
# This filter does not write any additional information to the context hash.
|
15
|
-
class EmailReplyFilter < TextFilter
|
16
|
-
include EscapeUtils
|
17
|
-
|
18
|
-
EMAIL_HIDDEN_HEADER = %(<span class="email-hidden-toggle"><a href="#">…</a></span><div class="email-hidden-reply" style="display:none">).freeze
|
19
|
-
EMAIL_QUOTED_HEADER = %(<div class="email-quoted-reply">).freeze
|
20
|
-
EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze
|
21
|
-
EMAIL_FRAGMENT_HEADER = %(<div class="email-fragment">).freeze
|
22
|
-
EMAIL_HEADER_END = '</div>'.freeze
|
23
|
-
EMAIL_REGEX = /[^@\s.][^@\s]*@\[?[a-z0-9.-]+\]?/
|
24
|
-
HIDDEN_EMAIL_PATTERN = '***@***.***'.freeze
|
25
|
-
|
26
|
-
# Scans an email body to determine which bits are quoted and which should
|
27
|
-
# be hidden. EmailReplyParser is used to split the comment into an Array
|
28
|
-
# of quoted or unquoted Blocks. Now, we loop through them and attempt to
|
29
|
-
# add <div> tags around them so we can hide the hidden blocks, and style
|
30
|
-
# the quoted blocks differently. Since multiple blocks may be hidden, be
|
31
|
-
# sure to keep the "email-hidden-reply" <div>s around "email-quoted-reply"
|
32
|
-
# <div> tags. Call this on each comment of a visible thread in the order
|
33
|
-
# that they are displayed. Note: all comments are processed so we can
|
34
|
-
# maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
|
35
|
-
# markdown step.
|
36
|
-
#
|
37
|
-
# Returns the email comment HTML as a String
|
38
|
-
def call
|
39
|
-
found_hidden = nil
|
40
|
-
paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
|
41
|
-
pieces = [CGI.escapeHTML(fragment.to_s.strip).gsub(/^\s*(>|>)/, '')]
|
42
|
-
|
43
|
-
if fragment.quoted?
|
44
|
-
if context[:hide_quoted_email_addresses]
|
45
|
-
pieces.map! do |piece|
|
46
|
-
piece.gsub(EMAIL_REGEX, HIDDEN_EMAIL_PATTERN)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
pieces.unshift EMAIL_QUOTED_HEADER
|
50
|
-
pieces << EMAIL_HEADER_END
|
51
|
-
elsif fragment.signature?
|
52
|
-
pieces.unshift EMAIL_SIGNATURE_HEADER
|
53
|
-
pieces << EMAIL_HEADER_END
|
54
|
-
else
|
55
|
-
pieces.unshift EMAIL_FRAGMENT_HEADER
|
56
|
-
pieces << EMAIL_HEADER_END
|
57
|
-
end
|
58
|
-
if fragment.hidden? && !found_hidden
|
59
|
-
found_hidden = true
|
60
|
-
pieces.unshift EMAIL_HIDDEN_HEADER
|
61
|
-
end
|
62
|
-
pieces.join
|
63
|
-
end
|
64
|
-
paragraphs << EMAIL_HEADER_END if found_hidden
|
65
|
-
paragraphs.join("\n")
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/lib/html/pipeline/filter.rb
DELETED
@@ -1,165 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# Base class for user content HTML filters. Each filter takes an
|
6
|
-
# HTML string or Nokogiri::HTML::DocumentFragment, performs
|
7
|
-
# modifications and/or writes information to the result hash. Filters must
|
8
|
-
# return a DocumentFragment (typically the same instance provided to the call
|
9
|
-
# method) or a String with HTML markup.
|
10
|
-
#
|
11
|
-
# Example filter that replaces all images with trollface:
|
12
|
-
#
|
13
|
-
# class FuuuFilter < HTML::Pipeline::Filter
|
14
|
-
# def call
|
15
|
-
# doc.search('img').each do |img|
|
16
|
-
# img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
|
17
|
-
# end
|
18
|
-
# end
|
19
|
-
# end
|
20
|
-
#
|
21
|
-
# The context Hash passes options to filters and should not be changed in
|
22
|
-
# place. A Result Hash allows filters to make extracted information
|
23
|
-
# available to the caller and is mutable.
|
24
|
-
#
|
25
|
-
# Common context options:
|
26
|
-
# :base_url - The site's base URL
|
27
|
-
# :repository - A Repository providing context for the HTML being processed
|
28
|
-
#
|
29
|
-
# Each filter may define additional options and output values. See the class
|
30
|
-
# docs for more info.
|
31
|
-
class Filter
|
32
|
-
class InvalidDocumentException < StandardError; end
|
33
|
-
|
34
|
-
def initialize(doc, context = nil, result = nil)
|
35
|
-
if doc.is_a?(String)
|
36
|
-
@html = doc.to_str
|
37
|
-
@doc = nil
|
38
|
-
else
|
39
|
-
@doc = doc
|
40
|
-
@html = nil
|
41
|
-
end
|
42
|
-
@context = context || {}
|
43
|
-
@result = result || {}
|
44
|
-
validate
|
45
|
-
end
|
46
|
-
|
47
|
-
# Public: Returns a simple Hash used to pass extra information into filters
|
48
|
-
# and also to allow filters to make extracted information available to the
|
49
|
-
# caller.
|
50
|
-
attr_reader :context
|
51
|
-
|
52
|
-
# Public: Returns a Hash used to allow filters to pass back information
|
53
|
-
# to callers of the various Pipelines. This can be used for
|
54
|
-
# #mentioned_users, for example.
|
55
|
-
attr_reader :result
|
56
|
-
|
57
|
-
# The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
|
58
|
-
# provided a String, parse into a DocumentFragment the first time this
|
59
|
-
# method is called.
|
60
|
-
def doc
|
61
|
-
@doc ||= parse_html(html)
|
62
|
-
end
|
63
|
-
|
64
|
-
# The String representation of the document. If a DocumentFragment was
|
65
|
-
# provided to the Filter, it is serialized into a String when this method is
|
66
|
-
# called.
|
67
|
-
def html
|
68
|
-
raise InvalidDocumentException if @html.nil? && @doc.nil?
|
69
|
-
@html || doc.to_html
|
70
|
-
end
|
71
|
-
|
72
|
-
# The main filter entry point. The doc attribute is guaranteed to be a
|
73
|
-
# Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
|
74
|
-
# this document in place or extract information and add it to the context
|
75
|
-
# hash.
|
76
|
-
def call
|
77
|
-
raise NotImplementedError
|
78
|
-
end
|
79
|
-
|
80
|
-
# Make sure the context has everything we need. Noop: Subclasses can override.
|
81
|
-
def validate; end
|
82
|
-
|
83
|
-
# The Repository object provided in the context hash, or nil when no
|
84
|
-
# :repository was specified.
|
85
|
-
#
|
86
|
-
# It's assumed that the repository context has already been checked
|
87
|
-
# for permissions
|
88
|
-
def repository
|
89
|
-
context[:repository]
|
90
|
-
end
|
91
|
-
|
92
|
-
# The User object provided in the context hash, or nil when no user
|
93
|
-
# was specified
|
94
|
-
def current_user
|
95
|
-
context[:current_user]
|
96
|
-
end
|
97
|
-
|
98
|
-
# The site's base URL provided in the context hash, or '/' when no
|
99
|
-
# base URL was specified.
|
100
|
-
def base_url
|
101
|
-
context[:base_url] || '/'
|
102
|
-
end
|
103
|
-
|
104
|
-
# Ensure the passed argument is a DocumentFragment. When a string is
|
105
|
-
# provided, it is parsed and returned; otherwise, the DocumentFragment is
|
106
|
-
# returned unmodified.
|
107
|
-
def parse_html(html)
|
108
|
-
HTML::Pipeline.parse(html)
|
109
|
-
end
|
110
|
-
|
111
|
-
# Helper method for filter subclasses used to determine if any of a node's
|
112
|
-
# ancestors have one of the tag names specified.
|
113
|
-
#
|
114
|
-
# node - The Node object to check.
|
115
|
-
# tags - An array of tag name strings to check. These should be downcase.
|
116
|
-
#
|
117
|
-
# Returns true when the node has a matching ancestor.
|
118
|
-
def has_ancestor?(node, tags)
|
119
|
-
while node = node.parent
|
120
|
-
break true if tags.include?(node.name.downcase)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
# Perform a filter on doc with the given context.
|
125
|
-
#
|
126
|
-
# Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
|
127
|
-
# markup.
|
128
|
-
def self.call(doc, context = nil, result = nil)
|
129
|
-
new(doc, context, result).call
|
130
|
-
end
|
131
|
-
|
132
|
-
# Like call but guarantees that a DocumentFragment is returned, even when
|
133
|
-
# the last filter returns a String.
|
134
|
-
def self.to_document(input, context = nil)
|
135
|
-
html = call(input, context)
|
136
|
-
HTML::Pipeline.parse(html)
|
137
|
-
end
|
138
|
-
|
139
|
-
# Like call but guarantees that a string of HTML markup is returned.
|
140
|
-
def self.to_html(input, context = nil)
|
141
|
-
output = call(input, context)
|
142
|
-
if output.respond_to?(:to_html)
|
143
|
-
output.to_html
|
144
|
-
else
|
145
|
-
output.to_s
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
# Validator for required context. This will check that anything passed in
|
150
|
-
# contexts exists in @contexts
|
151
|
-
#
|
152
|
-
# If any errors are found an ArgumentError will be raised with a
|
153
|
-
# message listing all the missing contexts and the filters that
|
154
|
-
# require them.
|
155
|
-
def needs(*keys)
|
156
|
-
missing = keys.reject { |key| context.include? key }
|
157
|
-
|
158
|
-
if missing.any?
|
159
|
-
raise ArgumentError,
|
160
|
-
"Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# HTML Filter for replacing http references to :http_url with https versions.
|
6
|
-
# Subdomain references are not rewritten.
|
7
|
-
#
|
8
|
-
# Context options:
|
9
|
-
# :http_url - The HTTP url to force HTTPS. Falls back to :base_url
|
10
|
-
class HttpsFilter < Filter
|
11
|
-
def call
|
12
|
-
doc.css(%(a[href^="#{http_url}"])).each do |element|
|
13
|
-
element['href'] = element['href'].sub(/^http:/, 'https:')
|
14
|
-
end
|
15
|
-
doc
|
16
|
-
end
|
17
|
-
|
18
|
-
# HTTP url to replace. Falls back to :base_url
|
19
|
-
def http_url
|
20
|
-
context[:http_url] || context[:base_url]
|
21
|
-
end
|
22
|
-
|
23
|
-
# Raise error if :http_url undefined
|
24
|
-
def validate
|
25
|
-
needs :http_url unless http_url
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# This filter rewrites image tags with a max-width inline style and also wraps
|
6
|
-
# the image in an <a> tag that causes the full size image to be opened in a
|
7
|
-
# new tab.
|
8
|
-
#
|
9
|
-
# The max-width inline styles are especially useful in HTML email which
|
10
|
-
# don't use a global stylesheets.
|
11
|
-
class ImageMaxWidthFilter < Filter
|
12
|
-
def call
|
13
|
-
doc.search('img').each do |element|
|
14
|
-
# Skip if there's already a style attribute. Not sure how this
|
15
|
-
# would happen but we can reconsider it in the future.
|
16
|
-
next if element['style']
|
17
|
-
|
18
|
-
# Bail out if src doesn't look like a valid http url. trying to avoid weird
|
19
|
-
# js injection via javascript: urls.
|
20
|
-
next if element['src'].to_s.strip =~ /\Ajavascript/i
|
21
|
-
|
22
|
-
element['style'] = 'max-width:100%;'
|
23
|
-
|
24
|
-
link_image element unless has_ancestor?(element, %w[a])
|
25
|
-
end
|
26
|
-
|
27
|
-
doc
|
28
|
-
end
|
29
|
-
|
30
|
-
def link_image(element)
|
31
|
-
link = doc.document.create_element('a', href: element['src'], target: '_blank')
|
32
|
-
link.add_child(element.dup)
|
33
|
-
element.replace(link)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('commonmarker', 'MarkdownFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML Filter that converts Markdown text into HTML and converts into a
|
8
|
-
# DocumentFragment. This is different from most filters in that it can take a
|
9
|
-
# non-HTML as input. It must be used as the first filter in a pipeline.
|
10
|
-
#
|
11
|
-
# Context options:
|
12
|
-
# :gfm => false Disable GFM line-end processing
|
13
|
-
# :commonmarker_extensions => [ :table, :strikethrough,
|
14
|
-
# :tagfilter, :autolink ] Commonmarker extensions to include
|
15
|
-
#
|
16
|
-
# This filter does not write any additional information to the context hash.
|
17
|
-
class MarkdownFilter < TextFilter
|
18
|
-
DEFAULT_COMMONMARKER_EXTENSIONS = %i[table strikethrough tagfilter autolink].freeze
|
19
|
-
|
20
|
-
def initialize(text, context = nil, result = nil)
|
21
|
-
super text, context, result
|
22
|
-
@text = @text.delete "\r"
|
23
|
-
end
|
24
|
-
|
25
|
-
# Convert Markdown to HTML using the best available implementation
|
26
|
-
# and convert into a DocumentFragment.
|
27
|
-
def call
|
28
|
-
extensions = context.fetch(
|
29
|
-
:commonmarker_extensions,
|
30
|
-
DEFAULT_COMMONMARKER_EXTENSIONS
|
31
|
-
)
|
32
|
-
html = if (renderer = context[:commonmarker_renderer])
|
33
|
-
unless renderer < CommonMarker::HtmlRenderer
|
34
|
-
raise ArgumentError, "`commonmark_renderer` must be derived from `CommonMarker::HtmlRenderer`"
|
35
|
-
end
|
36
|
-
parse_options = :DEFAULT
|
37
|
-
parse_options = [:UNSAFE] if context[:unsafe]
|
38
|
-
|
39
|
-
render_options = [:GITHUB_PRE_LANG]
|
40
|
-
render_options << :HARDBREAKS if context[:gfm] != false
|
41
|
-
render_options << :UNSAFE if context[:unsafe]
|
42
|
-
|
43
|
-
doc = CommonMarker.render_doc(@text, parse_options, extensions)
|
44
|
-
renderer.new(options: render_options, extensions: extensions).render(doc)
|
45
|
-
else
|
46
|
-
options = [:GITHUB_PRE_LANG]
|
47
|
-
options << :HARDBREAKS if context[:gfm] != false
|
48
|
-
options << :UNSAFE if context[:unsafe]
|
49
|
-
CommonMarker.render_html(@text, options, extensions)
|
50
|
-
end
|
51
|
-
html.rstrip!
|
52
|
-
html
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|