html-pipeline 2.14.3 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/FUNDING.yml +11 -3
- data/.github/dependabot.yml +27 -0
- data/.github/workflows/automerge.yml +13 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/lint.yml +23 -0
- data/.github/workflows/publish.yml +19 -0
- data/.rubocop.yml +17 -0
- data/.ruby-version +1 -0
- data/.vscode/settings.json +8 -0
- data/CHANGELOG.md +119 -2
- data/Gemfile +31 -15
- data/{LICENSE → LICENSE.txt} +2 -2
- data/README.md +241 -224
- data/Rakefile +14 -7
- data/UPGRADING.md +34 -0
- data/html-pipeline.gemspec +31 -21
- data/lib/html-pipeline.rb +3 -0
- data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
- data/lib/html_pipeline/convert_filter.rb +17 -0
- data/lib/html_pipeline/filter.rb +89 -0
- data/lib/html_pipeline/node_filter/absolute_source_filter.rb +54 -0
- data/lib/html_pipeline/node_filter/asset_proxy_filter.rb +86 -0
- data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
- data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
- data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
- data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +54 -68
- data/lib/html_pipeline/node_filter/syntax_highlight_filter.rb +62 -0
- data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +70 -0
- data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
- data/lib/html_pipeline/node_filter.rb +31 -0
- data/lib/html_pipeline/sanitization_filter.rb +188 -0
- data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
- data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
- data/lib/html_pipeline/text_filter.rb +21 -0
- data/lib/html_pipeline/version.rb +5 -0
- data/lib/html_pipeline.rb +281 -0
- metadata +58 -54
- data/.travis.yml +0 -43
- data/Appraisals +0 -19
- data/CONTRIBUTING.md +0 -60
- data/bin/html-pipeline +0 -78
- data/lib/html/pipeline/@team_mention_filter.rb +0 -99
- data/lib/html/pipeline/absolute_source_filter.rb +0 -52
- data/lib/html/pipeline/autolink_filter.rb +0 -34
- data/lib/html/pipeline/body_content.rb +0 -44
- data/lib/html/pipeline/camo_filter.rb +0 -105
- data/lib/html/pipeline/email_reply_filter.rb +0 -69
- data/lib/html/pipeline/filter.rb +0 -165
- data/lib/html/pipeline/https_filter.rb +0 -29
- data/lib/html/pipeline/image_max_width_filter.rb +0 -37
- data/lib/html/pipeline/markdown_filter.rb +0 -56
- data/lib/html/pipeline/sanitization_filter.rb +0 -144
- data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
- data/lib/html/pipeline/text_filter.rb +0 -16
- data/lib/html/pipeline/textile_filter.rb +0 -25
- data/lib/html/pipeline/toc_filter.rb +0 -69
- data/lib/html/pipeline/version.rb +0 -7
- data/lib/html/pipeline.rb +0 -210
@@ -1,99 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'set'
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML filter that replaces @org/team mentions with links. Mentions within
|
8
|
-
# <pre>, <code>, <a>, <style>, and <script> elements are ignored.
|
9
|
-
#
|
10
|
-
# Context options:
|
11
|
-
# :base_url - Used to construct links to team profile pages for each
|
12
|
-
# mention.
|
13
|
-
# :team_pattern - Used to provide a custom regular expression to
|
14
|
-
# identify team names
|
15
|
-
#
|
16
|
-
class TeamMentionFilter < Filter
|
17
|
-
# Public: Find @org/team mentions in text. See
|
18
|
-
# TeamMentionFilter#team_mention_link_filter.
|
19
|
-
#
|
20
|
-
# TeamMentionFilter.mentioned_teams_in(text) do |match, org, team|
|
21
|
-
# "<a href=...>#{team}</a>"
|
22
|
-
# end
|
23
|
-
#
|
24
|
-
# text - String text to search.
|
25
|
-
#
|
26
|
-
# Yields the String match, org name, and team name. The yield's
|
27
|
-
# return replaces the match in the original text.
|
28
|
-
#
|
29
|
-
# Returns a String replaced with the return of the block.
|
30
|
-
def self.mentioned_teams_in(text, team_pattern = TeamPattern)
|
31
|
-
text.gsub team_pattern do |match|
|
32
|
-
org = $1
|
33
|
-
team = $2
|
34
|
-
yield match, org, team
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
# Default pattern used to extract team names from text. The value can be
|
39
|
-
# overridden by providing the team_pattern variable in the context. To
|
40
|
-
# properly link the mention, should be in the format of /@(1)\/(2)/.
|
41
|
-
TeamPattern = /
|
42
|
-
(?<=^|\W) # beginning of string or non-word char
|
43
|
-
@([a-z0-9][a-z0-9-]*) # @organization
|
44
|
-
\/ # dividing slash
|
45
|
-
([a-z0-9][a-z0-9\-_]*) # team
|
46
|
-
\b
|
47
|
-
/ix
|
48
|
-
|
49
|
-
# Don't look for mentions in text nodes that are children of these elements
|
50
|
-
IGNORE_PARENTS = %w[pre code a style script].to_set
|
51
|
-
|
52
|
-
def call
|
53
|
-
result[:mentioned_teams] ||= []
|
54
|
-
|
55
|
-
doc.search('.//text()').each do |node|
|
56
|
-
content = node.to_html
|
57
|
-
next unless content.include?('@')
|
58
|
-
next if has_ancestor?(node, IGNORE_PARENTS)
|
59
|
-
html = mention_link_filter(content, base_url, team_pattern)
|
60
|
-
next if html == content
|
61
|
-
node.replace(html)
|
62
|
-
end
|
63
|
-
doc
|
64
|
-
end
|
65
|
-
|
66
|
-
def team_pattern
|
67
|
-
context[:team_pattern] || TeamPattern
|
68
|
-
end
|
69
|
-
|
70
|
-
# Replace @org/team mentions in text with links to the mentioned team's
|
71
|
-
# page.
|
72
|
-
#
|
73
|
-
# text - String text to replace @mention team names in.
|
74
|
-
# base_url - The base URL used to construct team page URLs.
|
75
|
-
# team_pattern - Regular expression used to identify teams in text
|
76
|
-
#
|
77
|
-
# Returns a string with @team mentions replaced with links. All links have a
|
78
|
-
# 'team-mention' class name attached for styling.
|
79
|
-
def mention_link_filter(text, _base_url = '/', team_pattern = TeamPattern)
|
80
|
-
self.class.mentioned_teams_in(text, team_pattern) do |match, org, team|
|
81
|
-
link = link_to_mentioned_team(org, team)
|
82
|
-
|
83
|
-
link ? match.sub("@#{org}/#{team}", link) : match
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def link_to_mentioned_team(org, team)
|
88
|
-
result[:mentioned_teams] |= [team]
|
89
|
-
|
90
|
-
url = base_url.dup
|
91
|
-
url << '/' unless url =~ /[\/~]\z/
|
92
|
-
|
93
|
-
"<a href='#{url << org}/#{team}' class='team-mention'>" \
|
94
|
-
"@#{org}/#{team}" \
|
95
|
-
'</a>'
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'uri'
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
class AbsoluteSourceFilter < Filter
|
8
|
-
# HTML Filter for replacing relative and root relative image URLs with
|
9
|
-
# fully qualified URLs
|
10
|
-
#
|
11
|
-
# This is useful if an image is root relative but should really be going
|
12
|
-
# through a cdn, or if the content for the page assumes the host is known
|
13
|
-
# i.e. scraped webpages and some RSS feeds.
|
14
|
-
#
|
15
|
-
# Context options:
|
16
|
-
# :image_base_url - Base URL for image host for root relative src.
|
17
|
-
# :image_subpage_url - For relative src.
|
18
|
-
#
|
19
|
-
# This filter does not write additional information to the context.
|
20
|
-
# This filter would need to be run before CamoFilter.
|
21
|
-
def call
|
22
|
-
doc.search('img').each do |element|
|
23
|
-
next if element['src'].nil? || element['src'].empty?
|
24
|
-
src = element['src'].strip
|
25
|
-
next if src.start_with? 'http'
|
26
|
-
base = if src.start_with? '/'
|
27
|
-
image_base_url
|
28
|
-
else
|
29
|
-
image_subpage_url
|
30
|
-
end
|
31
|
-
|
32
|
-
begin
|
33
|
-
element['src'] = URI.join(base, src).to_s
|
34
|
-
rescue Exception
|
35
|
-
next
|
36
|
-
end
|
37
|
-
end
|
38
|
-
doc
|
39
|
-
end
|
40
|
-
|
41
|
-
# Private: the base url you want to use
|
42
|
-
def image_base_url
|
43
|
-
context[:image_base_url] || raise("Missing context :image_base_url for #{self.class.name}")
|
44
|
-
end
|
45
|
-
|
46
|
-
# Private: the relative url you want to use
|
47
|
-
def image_subpage_url
|
48
|
-
context[:image_subpage_url] || raise("Missing context :image_subpage_url for #{self.class.name}")
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('rinku', 'AutolinkFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML Filter for auto_linking urls in HTML.
|
8
|
-
#
|
9
|
-
# Context options:
|
10
|
-
# :autolink - boolean whether to autolink urls
|
11
|
-
# :link_mode - :all, :urls or :email_addresses
|
12
|
-
# :link_attr - HTML attributes for the link that will be generated
|
13
|
-
# :skip_tags - HTML tags inside which autolinking will be skipped.
|
14
|
-
# See Rinku.skip_tags
|
15
|
-
# :flags - additional Rinku flags. See https://github.com/vmg/rinku
|
16
|
-
#
|
17
|
-
# This filter does not write additional information to the context.
|
18
|
-
class AutolinkFilter < Filter
|
19
|
-
def call
|
20
|
-
return html if context[:autolink] == false
|
21
|
-
|
22
|
-
skip_tags = context[:skip_tags]
|
23
|
-
flags = 0
|
24
|
-
flags |= context[:flags] if context[:flags]
|
25
|
-
|
26
|
-
Rinku.auto_link(html, link_mode, context[:link_attr], skip_tags, flags)
|
27
|
-
end
|
28
|
-
|
29
|
-
def link_mode
|
30
|
-
context[:link_mode] || :urls
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# Public: Runs a String of content through an HTML processing pipeline,
|
6
|
-
# providing easy access to a generated DocumentFragment.
|
7
|
-
class BodyContent
|
8
|
-
attr_reader :result
|
9
|
-
|
10
|
-
# Public: Initialize a BodyContent.
|
11
|
-
#
|
12
|
-
# body - A String body.
|
13
|
-
# context - A Hash of context options for the filters.
|
14
|
-
# pipeline - A HTML::Pipeline object with one or more Filters.
|
15
|
-
def initialize(body, context, pipeline)
|
16
|
-
@body = body
|
17
|
-
@context = context
|
18
|
-
@pipeline = pipeline
|
19
|
-
end
|
20
|
-
|
21
|
-
# Public: Gets the memoized result of the body content as it passed through
|
22
|
-
# the Pipeline.
|
23
|
-
#
|
24
|
-
# Returns a Hash, or something similar as defined by @pipeline.result_class.
|
25
|
-
def result
|
26
|
-
@result ||= @pipeline.call @body, @context
|
27
|
-
end
|
28
|
-
|
29
|
-
# Public: Gets the updated body from the Pipeline result.
|
30
|
-
#
|
31
|
-
# Returns a String or DocumentFragment.
|
32
|
-
def output
|
33
|
-
@output ||= result[:output]
|
34
|
-
end
|
35
|
-
|
36
|
-
# Public: Parses the output into a DocumentFragment.
|
37
|
-
#
|
38
|
-
# Returns a DocumentFragment.
|
39
|
-
def document
|
40
|
-
@document ||= HTML::Pipeline.parse output
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,105 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'openssl'
|
4
|
-
require 'uri'
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
class Pipeline
|
8
|
-
# HTML Filter for replacing http image URLs with camo versions. See:
|
9
|
-
#
|
10
|
-
# https://github.com/atmos/camo
|
11
|
-
#
|
12
|
-
# All images provided in user content should be run through this
|
13
|
-
# filter so that http image sources do not cause mixed-content warnings
|
14
|
-
# in browser clients.
|
15
|
-
#
|
16
|
-
# Context options:
|
17
|
-
# :asset_proxy (required) - Base URL for constructed asset proxy URLs.
|
18
|
-
# :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
|
19
|
-
# :asset_proxy_allowlist - Array of host Strings or Regexps to skip
|
20
|
-
# src rewriting.
|
21
|
-
#
|
22
|
-
# This filter does not write additional information to the context.
|
23
|
-
class CamoFilter < Filter
|
24
|
-
# Hijacks images in the markup provided, replacing them with URLs that
|
25
|
-
# go through the github asset proxy.
|
26
|
-
def call
|
27
|
-
return doc unless asset_proxy_enabled?
|
28
|
-
|
29
|
-
doc.search('img').each do |element|
|
30
|
-
original_src = element['src']
|
31
|
-
next unless original_src
|
32
|
-
|
33
|
-
begin
|
34
|
-
uri = URI.parse(original_src)
|
35
|
-
rescue Exception
|
36
|
-
next
|
37
|
-
end
|
38
|
-
|
39
|
-
next if uri.host.nil?
|
40
|
-
next if asset_host_allowed?(uri.host)
|
41
|
-
|
42
|
-
element['src'] = asset_proxy_url(original_src)
|
43
|
-
element['data-canonical-src'] = original_src
|
44
|
-
end
|
45
|
-
doc
|
46
|
-
end
|
47
|
-
|
48
|
-
# Implementation of validate hook.
|
49
|
-
# Errors should raise exceptions or use an existing validator.
|
50
|
-
def validate
|
51
|
-
needs :asset_proxy, :asset_proxy_secret_key
|
52
|
-
end
|
53
|
-
|
54
|
-
# The camouflaged URL for a given image URL.
|
55
|
-
def asset_proxy_url(url)
|
56
|
-
"#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
|
57
|
-
end
|
58
|
-
|
59
|
-
# Private: calculate the HMAC digest for a image source URL.
|
60
|
-
def asset_url_hash(url)
|
61
|
-
OpenSSL::HMAC.hexdigest('sha1', asset_proxy_secret_key, url)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Private: Return true if asset proxy filter should be enabled
|
65
|
-
def asset_proxy_enabled?
|
66
|
-
!context[:disable_asset_proxy]
|
67
|
-
end
|
68
|
-
|
69
|
-
# Private: the host to use for generated asset proxied URLs.
|
70
|
-
def asset_proxy_host
|
71
|
-
context[:asset_proxy]
|
72
|
-
end
|
73
|
-
|
74
|
-
def asset_proxy_secret_key
|
75
|
-
context[:asset_proxy_secret_key]
|
76
|
-
end
|
77
|
-
|
78
|
-
def asset_proxy_whitelist
|
79
|
-
warn "[DEPRECATION] 'asset_proxy_whitelist' is deprecated. Please use 'asset_proxy_allowlist' instead."
|
80
|
-
asset_proxy_allowlist
|
81
|
-
end
|
82
|
-
|
83
|
-
def asset_proxy_allowlist
|
84
|
-
context[:asset_proxy_allowlist] || context[:asset_proxy_whitelist] || []
|
85
|
-
end
|
86
|
-
|
87
|
-
def asset_host_whitelisted?(host)
|
88
|
-
warn "[DEPRECATION] 'asset_host_whitelisted?' is deprecated. Please use 'asset_host_allowed?' instead."
|
89
|
-
asset_host_allowed?(host)
|
90
|
-
end
|
91
|
-
|
92
|
-
def asset_host_allowed?(host)
|
93
|
-
asset_proxy_allowlist.any? do |test|
|
94
|
-
test.is_a?(String) ? host == test : test.match(host)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
# Private: helper to hexencode a string. Each byte ends up encoded into
|
99
|
-
# two characters, zero padded value in the range [0-9a-f].
|
100
|
-
def hexencode(str)
|
101
|
-
str.unpack('H*').first
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('escape_utils', 'EmailReplyFilter')
|
4
|
-
HTML::Pipeline.require_dependency('email_reply_parser', 'EmailReplyFilter')
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
class Pipeline
|
8
|
-
# HTML Filter that converts email reply text into an HTML DocumentFragment.
|
9
|
-
# It must be used as the first filter in a pipeline.
|
10
|
-
#
|
11
|
-
# Context options:
|
12
|
-
# None
|
13
|
-
#
|
14
|
-
# This filter does not write any additional information to the context hash.
|
15
|
-
class EmailReplyFilter < TextFilter
|
16
|
-
include EscapeUtils
|
17
|
-
|
18
|
-
EMAIL_HIDDEN_HEADER = %(<span class="email-hidden-toggle"><a href="#">…</a></span><div class="email-hidden-reply" style="display:none">).freeze
|
19
|
-
EMAIL_QUOTED_HEADER = %(<div class="email-quoted-reply">).freeze
|
20
|
-
EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze
|
21
|
-
EMAIL_FRAGMENT_HEADER = %(<div class="email-fragment">).freeze
|
22
|
-
EMAIL_HEADER_END = '</div>'.freeze
|
23
|
-
EMAIL_REGEX = /[^@\s.][^@\s]*@\[?[a-z0-9.-]+\]?/
|
24
|
-
HIDDEN_EMAIL_PATTERN = '***@***.***'.freeze
|
25
|
-
|
26
|
-
# Scans an email body to determine which bits are quoted and which should
|
27
|
-
# be hidden. EmailReplyParser is used to split the comment into an Array
|
28
|
-
# of quoted or unquoted Blocks. Now, we loop through them and attempt to
|
29
|
-
# add <div> tags around them so we can hide the hidden blocks, and style
|
30
|
-
# the quoted blocks differently. Since multiple blocks may be hidden, be
|
31
|
-
# sure to keep the "email-hidden-reply" <div>s around "email-quoted-reply"
|
32
|
-
# <div> tags. Call this on each comment of a visible thread in the order
|
33
|
-
# that they are displayed. Note: all comments are processed so we can
|
34
|
-
# maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
|
35
|
-
# markdown step.
|
36
|
-
#
|
37
|
-
# Returns the email comment HTML as a String
|
38
|
-
def call
|
39
|
-
found_hidden = nil
|
40
|
-
paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
|
41
|
-
pieces = [CGI.escapeHTML(fragment.to_s.strip).gsub(/^\s*(>|>)/, '')]
|
42
|
-
|
43
|
-
if fragment.quoted?
|
44
|
-
if context[:hide_quoted_email_addresses]
|
45
|
-
pieces.map! do |piece|
|
46
|
-
piece.gsub(EMAIL_REGEX, HIDDEN_EMAIL_PATTERN)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
pieces.unshift EMAIL_QUOTED_HEADER
|
50
|
-
pieces << EMAIL_HEADER_END
|
51
|
-
elsif fragment.signature?
|
52
|
-
pieces.unshift EMAIL_SIGNATURE_HEADER
|
53
|
-
pieces << EMAIL_HEADER_END
|
54
|
-
else
|
55
|
-
pieces.unshift EMAIL_FRAGMENT_HEADER
|
56
|
-
pieces << EMAIL_HEADER_END
|
57
|
-
end
|
58
|
-
if fragment.hidden? && !found_hidden
|
59
|
-
found_hidden = true
|
60
|
-
pieces.unshift EMAIL_HIDDEN_HEADER
|
61
|
-
end
|
62
|
-
pieces.join
|
63
|
-
end
|
64
|
-
paragraphs << EMAIL_HEADER_END if found_hidden
|
65
|
-
paragraphs.join("\n")
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/lib/html/pipeline/filter.rb
DELETED
@@ -1,165 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# Base class for user content HTML filters. Each filter takes an
|
6
|
-
# HTML string or Nokogiri::HTML::DocumentFragment, performs
|
7
|
-
# modifications and/or writes information to the result hash. Filters must
|
8
|
-
# return a DocumentFragment (typically the same instance provided to the call
|
9
|
-
# method) or a String with HTML markup.
|
10
|
-
#
|
11
|
-
# Example filter that replaces all images with trollface:
|
12
|
-
#
|
13
|
-
# class FuuuFilter < HTML::Pipeline::Filter
|
14
|
-
# def call
|
15
|
-
# doc.search('img').each do |img|
|
16
|
-
# img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
|
17
|
-
# end
|
18
|
-
# end
|
19
|
-
# end
|
20
|
-
#
|
21
|
-
# The context Hash passes options to filters and should not be changed in
|
22
|
-
# place. A Result Hash allows filters to make extracted information
|
23
|
-
# available to the caller and is mutable.
|
24
|
-
#
|
25
|
-
# Common context options:
|
26
|
-
# :base_url - The site's base URL
|
27
|
-
# :repository - A Repository providing context for the HTML being processed
|
28
|
-
#
|
29
|
-
# Each filter may define additional options and output values. See the class
|
30
|
-
# docs for more info.
|
31
|
-
class Filter
|
32
|
-
class InvalidDocumentException < StandardError; end
|
33
|
-
|
34
|
-
def initialize(doc, context = nil, result = nil)
|
35
|
-
if doc.is_a?(String)
|
36
|
-
@html = doc.to_str
|
37
|
-
@doc = nil
|
38
|
-
else
|
39
|
-
@doc = doc
|
40
|
-
@html = nil
|
41
|
-
end
|
42
|
-
@context = context || {}
|
43
|
-
@result = result || {}
|
44
|
-
validate
|
45
|
-
end
|
46
|
-
|
47
|
-
# Public: Returns a simple Hash used to pass extra information into filters
|
48
|
-
# and also to allow filters to make extracted information available to the
|
49
|
-
# caller.
|
50
|
-
attr_reader :context
|
51
|
-
|
52
|
-
# Public: Returns a Hash used to allow filters to pass back information
|
53
|
-
# to callers of the various Pipelines. This can be used for
|
54
|
-
# #mentioned_users, for example.
|
55
|
-
attr_reader :result
|
56
|
-
|
57
|
-
# The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
|
58
|
-
# provided a String, parse into a DocumentFragment the first time this
|
59
|
-
# method is called.
|
60
|
-
def doc
|
61
|
-
@doc ||= parse_html(html)
|
62
|
-
end
|
63
|
-
|
64
|
-
# The String representation of the document. If a DocumentFragment was
|
65
|
-
# provided to the Filter, it is serialized into a String when this method is
|
66
|
-
# called.
|
67
|
-
def html
|
68
|
-
raise InvalidDocumentException if @html.nil? && @doc.nil?
|
69
|
-
@html || doc.to_html
|
70
|
-
end
|
71
|
-
|
72
|
-
# The main filter entry point. The doc attribute is guaranteed to be a
|
73
|
-
# Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
|
74
|
-
# this document in place or extract information and add it to the context
|
75
|
-
# hash.
|
76
|
-
def call
|
77
|
-
raise NotImplementedError
|
78
|
-
end
|
79
|
-
|
80
|
-
# Make sure the context has everything we need. Noop: Subclasses can override.
|
81
|
-
def validate; end
|
82
|
-
|
83
|
-
# The Repository object provided in the context hash, or nil when no
|
84
|
-
# :repository was specified.
|
85
|
-
#
|
86
|
-
# It's assumed that the repository context has already been checked
|
87
|
-
# for permissions
|
88
|
-
def repository
|
89
|
-
context[:repository]
|
90
|
-
end
|
91
|
-
|
92
|
-
# The User object provided in the context hash, or nil when no user
|
93
|
-
# was specified
|
94
|
-
def current_user
|
95
|
-
context[:current_user]
|
96
|
-
end
|
97
|
-
|
98
|
-
# The site's base URL provided in the context hash, or '/' when no
|
99
|
-
# base URL was specified.
|
100
|
-
def base_url
|
101
|
-
context[:base_url] || '/'
|
102
|
-
end
|
103
|
-
|
104
|
-
# Ensure the passed argument is a DocumentFragment. When a string is
|
105
|
-
# provided, it is parsed and returned; otherwise, the DocumentFragment is
|
106
|
-
# returned unmodified.
|
107
|
-
def parse_html(html)
|
108
|
-
HTML::Pipeline.parse(html)
|
109
|
-
end
|
110
|
-
|
111
|
-
# Helper method for filter subclasses used to determine if any of a node's
|
112
|
-
# ancestors have one of the tag names specified.
|
113
|
-
#
|
114
|
-
# node - The Node object to check.
|
115
|
-
# tags - An array of tag name strings to check. These should be downcase.
|
116
|
-
#
|
117
|
-
# Returns true when the node has a matching ancestor.
|
118
|
-
def has_ancestor?(node, tags)
|
119
|
-
while node = node.parent
|
120
|
-
break true if tags.include?(node.name.downcase)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
# Perform a filter on doc with the given context.
|
125
|
-
#
|
126
|
-
# Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
|
127
|
-
# markup.
|
128
|
-
def self.call(doc, context = nil, result = nil)
|
129
|
-
new(doc, context, result).call
|
130
|
-
end
|
131
|
-
|
132
|
-
# Like call but guarantees that a DocumentFragment is returned, even when
|
133
|
-
# the last filter returns a String.
|
134
|
-
def self.to_document(input, context = nil)
|
135
|
-
html = call(input, context)
|
136
|
-
HTML::Pipeline.parse(html)
|
137
|
-
end
|
138
|
-
|
139
|
-
# Like call but guarantees that a string of HTML markup is returned.
|
140
|
-
def self.to_html(input, context = nil)
|
141
|
-
output = call(input, context)
|
142
|
-
if output.respond_to?(:to_html)
|
143
|
-
output.to_html
|
144
|
-
else
|
145
|
-
output.to_s
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
# Validator for required context. This will check that anything passed in
|
150
|
-
# contexts exists in @contexts
|
151
|
-
#
|
152
|
-
# If any errors are found an ArgumentError will be raised with a
|
153
|
-
# message listing all the missing contexts and the filters that
|
154
|
-
# require them.
|
155
|
-
def needs(*keys)
|
156
|
-
missing = keys.reject { |key| context.include? key }
|
157
|
-
|
158
|
-
if missing.any?
|
159
|
-
raise ArgumentError,
|
160
|
-
"Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# HTML Filter for replacing http references to :http_url with https versions.
|
6
|
-
# Subdomain references are not rewritten.
|
7
|
-
#
|
8
|
-
# Context options:
|
9
|
-
# :http_url - The HTTP url to force HTTPS. Falls back to :base_url
|
10
|
-
class HttpsFilter < Filter
|
11
|
-
def call
|
12
|
-
doc.css(%(a[href^="#{http_url}"])).each do |element|
|
13
|
-
element['href'] = element['href'].sub(/^http:/, 'https:')
|
14
|
-
end
|
15
|
-
doc
|
16
|
-
end
|
17
|
-
|
18
|
-
# HTTP url to replace. Falls back to :base_url
|
19
|
-
def http_url
|
20
|
-
context[:http_url] || context[:base_url]
|
21
|
-
end
|
22
|
-
|
23
|
-
# Raise error if :http_url undefined
|
24
|
-
def validate
|
25
|
-
needs :http_url unless http_url
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
# This filter rewrites image tags with a max-width inline style and also wraps
|
6
|
-
# the image in an <a> tag that causes the full size image to be opened in a
|
7
|
-
# new tab.
|
8
|
-
#
|
9
|
-
# The max-width inline styles are especially useful in HTML email which
|
10
|
-
# don't use a global stylesheets.
|
11
|
-
class ImageMaxWidthFilter < Filter
|
12
|
-
def call
|
13
|
-
doc.search('img').each do |element|
|
14
|
-
# Skip if there's already a style attribute. Not sure how this
|
15
|
-
# would happen but we can reconsider it in the future.
|
16
|
-
next if element['style']
|
17
|
-
|
18
|
-
# Bail out if src doesn't look like a valid http url. trying to avoid weird
|
19
|
-
# js injection via javascript: urls.
|
20
|
-
next if element['src'].to_s.strip =~ /\Ajavascript/i
|
21
|
-
|
22
|
-
element['style'] = 'max-width:100%;'
|
23
|
-
|
24
|
-
link_image element unless has_ancestor?(element, %w[a])
|
25
|
-
end
|
26
|
-
|
27
|
-
doc
|
28
|
-
end
|
29
|
-
|
30
|
-
def link_image(element)
|
31
|
-
link = doc.document.create_element('a', href: element['src'], target: '_blank')
|
32
|
-
link.add_child(element.dup)
|
33
|
-
element.replace(link)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('commonmarker', 'MarkdownFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML Filter that converts Markdown text into HTML and converts into a
|
8
|
-
# DocumentFragment. This is different from most filters in that it can take a
|
9
|
-
# non-HTML as input. It must be used as the first filter in a pipeline.
|
10
|
-
#
|
11
|
-
# Context options:
|
12
|
-
# :gfm => false Disable GFM line-end processing
|
13
|
-
# :commonmarker_extensions => [ :table, :strikethrough,
|
14
|
-
# :tagfilter, :autolink ] Commonmarker extensions to include
|
15
|
-
#
|
16
|
-
# This filter does not write any additional information to the context hash.
|
17
|
-
class MarkdownFilter < TextFilter
|
18
|
-
DEFAULT_COMMONMARKER_EXTENSIONS = %i[table strikethrough tagfilter autolink].freeze
|
19
|
-
|
20
|
-
def initialize(text, context = nil, result = nil)
|
21
|
-
super text, context, result
|
22
|
-
@text = @text.delete "\r"
|
23
|
-
end
|
24
|
-
|
25
|
-
# Convert Markdown to HTML using the best available implementation
|
26
|
-
# and convert into a DocumentFragment.
|
27
|
-
def call
|
28
|
-
extensions = context.fetch(
|
29
|
-
:commonmarker_extensions,
|
30
|
-
DEFAULT_COMMONMARKER_EXTENSIONS
|
31
|
-
)
|
32
|
-
html = if (renderer = context[:commonmarker_renderer])
|
33
|
-
unless renderer < CommonMarker::HtmlRenderer
|
34
|
-
raise ArgumentError, "`commonmark_renderer` must be derived from `CommonMarker::HtmlRenderer`"
|
35
|
-
end
|
36
|
-
parse_options = :DEFAULT
|
37
|
-
parse_options = [:UNSAFE] if context[:unsafe]
|
38
|
-
|
39
|
-
render_options = [:GITHUB_PRE_LANG]
|
40
|
-
render_options << :HARDBREAKS if context[:gfm] != false
|
41
|
-
render_options << :UNSAFE if context[:unsafe]
|
42
|
-
|
43
|
-
doc = CommonMarker.render_doc(@text, parse_options, extensions)
|
44
|
-
renderer.new(options: render_options, extensions: extensions).render(doc)
|
45
|
-
else
|
46
|
-
options = [:GITHUB_PRE_LANG]
|
47
|
-
options << :HARDBREAKS if context[:gfm] != false
|
48
|
-
options << :UNSAFE if context[:unsafe]
|
49
|
-
CommonMarker.render_html(@text, options, extensions)
|
50
|
-
end
|
51
|
-
html.rstrip!
|
52
|
-
html
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|