html-pipeline 2.14.3 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/FUNDING.yml +11 -3
- data/.github/dependabot.yml +27 -0
- data/.github/workflows/automerge.yml +13 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/lint.yml +23 -0
- data/.github/workflows/publish.yml +19 -0
- data/.rubocop.yml +17 -0
- data/.ruby-version +1 -0
- data/.vscode/settings.json +8 -0
- data/CHANGELOG.md +119 -2
- data/Gemfile +31 -15
- data/{LICENSE → LICENSE.txt} +2 -2
- data/README.md +241 -224
- data/Rakefile +14 -7
- data/UPGRADING.md +34 -0
- data/html-pipeline.gemspec +31 -21
- data/lib/html-pipeline.rb +3 -0
- data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
- data/lib/html_pipeline/convert_filter.rb +17 -0
- data/lib/html_pipeline/filter.rb +89 -0
- data/lib/html_pipeline/node_filter/absolute_source_filter.rb +54 -0
- data/lib/html_pipeline/node_filter/asset_proxy_filter.rb +86 -0
- data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
- data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
- data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
- data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +54 -68
- data/lib/html_pipeline/node_filter/syntax_highlight_filter.rb +62 -0
- data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +70 -0
- data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
- data/lib/html_pipeline/node_filter.rb +31 -0
- data/lib/html_pipeline/sanitization_filter.rb +188 -0
- data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
- data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
- data/lib/html_pipeline/text_filter.rb +21 -0
- data/lib/html_pipeline/version.rb +5 -0
- data/lib/html_pipeline.rb +281 -0
- metadata +58 -54
- data/.travis.yml +0 -43
- data/Appraisals +0 -19
- data/CONTRIBUTING.md +0 -60
- data/bin/html-pipeline +0 -78
- data/lib/html/pipeline/@team_mention_filter.rb +0 -99
- data/lib/html/pipeline/absolute_source_filter.rb +0 -52
- data/lib/html/pipeline/autolink_filter.rb +0 -34
- data/lib/html/pipeline/body_content.rb +0 -44
- data/lib/html/pipeline/camo_filter.rb +0 -105
- data/lib/html/pipeline/email_reply_filter.rb +0 -69
- data/lib/html/pipeline/filter.rb +0 -165
- data/lib/html/pipeline/https_filter.rb +0 -29
- data/lib/html/pipeline/image_max_width_filter.rb +0 -37
- data/lib/html/pipeline/markdown_filter.rb +0 -56
- data/lib/html/pipeline/sanitization_filter.rb +0 -144
- data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
- data/lib/html/pipeline/text_filter.rb +0 -16
- data/lib/html/pipeline/textile_filter.rb +0 -25
- data/lib/html/pipeline/toc_filter.rb +0 -69
- data/lib/html/pipeline/version.rb +0 -7
- data/lib/html/pipeline.rb +0 -210
@@ -1,9 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "set"
|
4
4
|
|
5
|
-
|
6
|
-
class
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter
|
7
7
|
# HTML filter that replaces @user mentions with links. Mentions within <pre>,
|
8
8
|
# <code>, and <a> elements are ignored. Mentions that reference users that do
|
9
9
|
# not exist are ignored.
|
@@ -16,71 +16,69 @@ module HTML
|
|
16
16
|
# :username_pattern - Used to provide a custom regular expression to
|
17
17
|
# identify usernames
|
18
18
|
#
|
19
|
-
class MentionFilter <
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
text
|
36
|
-
|
37
|
-
|
19
|
+
class MentionFilter < NodeFilter
|
20
|
+
class << self
|
21
|
+
# Public: Find user @mentions in text. See
|
22
|
+
# MentionFilter#mention_link_filter.
|
23
|
+
#
|
24
|
+
# MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
|
25
|
+
# "<a href=...>#{login}</a>"
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# text - String text to search.
|
29
|
+
#
|
30
|
+
# Yields the String match, the String login name, and a Boolean determining
|
31
|
+
# if the match = "@mention[ed]". The yield's return replaces the match in
|
32
|
+
# the original text.
|
33
|
+
#
|
34
|
+
# Returns a String replaced with the return of the block.
|
35
|
+
def mentioned_logins_in(text, username_pattern = USERNAME_PATTERN)
|
36
|
+
text.gsub(MENTION_PATTERNS[username_pattern]) do |match|
|
37
|
+
login = Regexp.last_match(1)
|
38
|
+
yield match, login
|
39
|
+
end
|
38
40
|
end
|
39
41
|
end
|
40
|
-
|
41
42
|
# Hash that contains all of the mention patterns used by the pipeline
|
42
|
-
|
43
|
-
hash[key] =
|
43
|
+
MENTION_PATTERNS = Hash.new do |hash, key|
|
44
|
+
hash[key] = %r{
|
44
45
|
(?:^|\W) # beginning of string or non-word char
|
45
46
|
@((?>#{key})) # @username
|
46
|
-
(
|
47
|
+
(?!/) # without a trailing slash
|
47
48
|
(?=
|
48
49
|
\.+[ \t\W]| # dots followed by space or non-word character
|
49
50
|
\.+$| # dots at end of line
|
50
51
|
[^0-9a-zA-Z_.]| # non-word character except dot
|
51
52
|
$ # end of line
|
52
53
|
)
|
53
|
-
|
54
|
+
}ix
|
54
55
|
end
|
55
56
|
|
56
57
|
# Default pattern used to extract usernames from text. The value can be
|
57
58
|
# overriden by providing the username_pattern variable in the context.
|
58
|
-
|
59
|
-
|
60
|
-
# List of username logins that, when mentioned, link to the blog post
|
61
|
-
# about @mentions instead of triggering a real mention.
|
62
|
-
MentionLogins = %w[
|
63
|
-
mention
|
64
|
-
mentions
|
65
|
-
mentioned
|
66
|
-
mentioning
|
67
|
-
].freeze
|
59
|
+
USERNAME_PATTERN = /[a-z0-9][a-z0-9-]*/
|
68
60
|
|
69
61
|
# Don't look for mentions in text nodes that are children of these elements
|
70
|
-
IGNORE_PARENTS =
|
62
|
+
IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
|
71
63
|
|
72
|
-
|
64
|
+
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
|
65
|
+
|
66
|
+
def after_initialize
|
73
67
|
result[:mentioned_usernames] ||= []
|
68
|
+
end
|
74
69
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
70
|
+
def selector
|
71
|
+
SELECTOR
|
72
|
+
end
|
73
|
+
|
74
|
+
def handle_text_chunk(text)
|
75
|
+
content = text.to_s
|
76
|
+
return unless content.include?("@")
|
77
|
+
|
78
|
+
html = mention_link_filter(content, base_url: base_url, username_pattern: username_pattern)
|
79
|
+
return if html == content
|
80
|
+
|
81
|
+
text.replace(html, as: :html)
|
84
82
|
end
|
85
83
|
|
86
84
|
# The URL to provide when someone @mentions a "mention" name, such
|
@@ -90,7 +88,7 @@ module HTML
|
|
90
88
|
end
|
91
89
|
|
92
90
|
def username_pattern
|
93
|
-
context[:username_pattern] ||
|
91
|
+
context[:username_pattern] || USERNAME_PATTERN
|
94
92
|
end
|
95
93
|
|
96
94
|
# Replace user @mentions in text with links to the mentioned user's
|
@@ -105,35 +103,23 @@ module HTML
|
|
105
103
|
#
|
106
104
|
# Returns a string with @mentions replaced with links. All links have a
|
107
105
|
# 'user-mention' class name attached for styling.
|
108
|
-
def mention_link_filter(text,
|
109
|
-
self.class.mentioned_logins_in(text, username_pattern) do |match, login
|
110
|
-
link =
|
111
|
-
if is_mentioned
|
112
|
-
link_to_mention_info(login, info_url)
|
113
|
-
else
|
114
|
-
link_to_mentioned_user(login)
|
115
|
-
end
|
106
|
+
def mention_link_filter(text, base_url: "/", username_pattern: USERNAME_PATTERN)
|
107
|
+
self.class.mentioned_logins_in(text, username_pattern) do |match, login|
|
108
|
+
link = link_to_mentioned_user(base_url, login)
|
116
109
|
|
117
110
|
link ? match.sub("@#{login}", link) : match
|
118
111
|
end
|
119
112
|
end
|
120
113
|
|
121
|
-
def
|
122
|
-
return "@#{text}" if info_url.nil?
|
123
|
-
"<a href='#{info_url}' class='user-mention'>" \
|
124
|
-
"@#{text}" \
|
125
|
-
'</a>'
|
126
|
-
end
|
127
|
-
|
128
|
-
def link_to_mentioned_user(login)
|
114
|
+
def link_to_mentioned_user(base_url, login)
|
129
115
|
result[:mentioned_usernames] |= [login]
|
130
116
|
|
131
117
|
url = base_url.dup
|
132
|
-
url <<
|
118
|
+
url << "/" unless %r{[/~]\z}.match?(url)
|
133
119
|
|
134
|
-
"<a href
|
120
|
+
"<a href=\"#{url << login}\" class=\"user-mention\">" \
|
135
121
|
"@#{login}" \
|
136
|
-
|
122
|
+
"</a>"
|
137
123
|
end
|
138
124
|
end
|
139
125
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
HTMLPipeline.require_dependency("rouge", "SyntaxHighlightFilter")
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter
|
7
|
+
# HTML Filter that syntax highlights text inside code blocks.
|
8
|
+
#
|
9
|
+
# Context options:
|
10
|
+
#
|
11
|
+
# :highlight => String represents the language to pick lexer. Defaults to empty string.
|
12
|
+
# :scope => String represents the class attribute adds to pre element after.
|
13
|
+
# Defaults to "highlight highlight-css" if highlights a css code block.
|
14
|
+
#
|
15
|
+
# This filter does not write any additional information to the context hash.
|
16
|
+
class SyntaxHighlightFilter < NodeFilter
|
17
|
+
def initialize(context: {}, result: {})
|
18
|
+
super(context: context, result: result)
|
19
|
+
# TODO: test the optionality of this
|
20
|
+
@formatter = context[:formatter] || Rouge::Formatters::HTML.new
|
21
|
+
end
|
22
|
+
|
23
|
+
SELECTOR = Selma::Selector.new(match_element: "pre", match_text_within: "pre")
|
24
|
+
|
25
|
+
def selector
|
26
|
+
SELECTOR
|
27
|
+
end
|
28
|
+
|
29
|
+
def handle_element(element)
|
30
|
+
default = context[:highlight]&.to_s
|
31
|
+
@lang = element["lang"] || default
|
32
|
+
|
33
|
+
scope = context.fetch(:scope, "highlight")
|
34
|
+
|
35
|
+
element["class"] = "#{scope} #{scope}-#{@lang}" if include_lang?
|
36
|
+
end
|
37
|
+
|
38
|
+
def handle_text_chunk(text)
|
39
|
+
return if @lang.nil?
|
40
|
+
return if (lexer = lexer_for(@lang)).nil?
|
41
|
+
|
42
|
+
content = text.to_s
|
43
|
+
|
44
|
+
text.replace(highlight_with_timeout_handling(content, lexer), as: :html)
|
45
|
+
end
|
46
|
+
|
47
|
+
def highlight_with_timeout_handling(text, lexer)
|
48
|
+
Rouge.highlight(text, lexer, @formatter)
|
49
|
+
rescue Timeout::Error => _e
|
50
|
+
text
|
51
|
+
end
|
52
|
+
|
53
|
+
def lexer_for(lang)
|
54
|
+
Rouge::Lexer.find(lang)
|
55
|
+
end
|
56
|
+
|
57
|
+
def include_lang?
|
58
|
+
!@lang.nil? && !@lang.empty?
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class HTMLPipeline
|
4
|
+
class NodeFilter
|
5
|
+
# Generates a Table of Contents: an array of hashes containing:
|
6
|
+
# * `href`: the relative link to the header
|
7
|
+
# * `text`: the text of the header
|
8
|
+
|
9
|
+
# Examples
|
10
|
+
#
|
11
|
+
# TocPipeline =
|
12
|
+
# HTMLPipeline.new [
|
13
|
+
# HTMLPipeline::TableOfContentsFilter
|
14
|
+
# ]
|
15
|
+
# # => #<HTMLPipeline:0x007fc13c4528d8...>
|
16
|
+
# orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
|
17
|
+
# # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
|
18
|
+
# result = {}
|
19
|
+
# # => {}
|
20
|
+
# TocPipeline.call(orig, {}, result)
|
21
|
+
# # => {:toc=> ...}
|
22
|
+
# result[:toc]
|
23
|
+
# # => "{:href=>"#ice-cube", :text=>"Ice cube"}"
|
24
|
+
# result[:output].to_s
|
25
|
+
# # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
|
26
|
+
class TableOfContentsFilter < NodeFilter
|
27
|
+
SELECTOR = Selma::Selector.new(
|
28
|
+
match_element: "h1 a[href], h2 a[href], h3 a[href], h4 a[href], h5 a[href], h6 a[href]",
|
29
|
+
match_text_within: "h1, h2, h3, h4, h5, h6",
|
30
|
+
)
|
31
|
+
|
32
|
+
def selector
|
33
|
+
SELECTOR
|
34
|
+
end
|
35
|
+
|
36
|
+
# The icon that will be placed next to an anchored rendered markdown header
|
37
|
+
def anchor_html
|
38
|
+
@context[:anchor_html] || %(<span aria-hidden="true" class="anchor"></span>)
|
39
|
+
end
|
40
|
+
|
41
|
+
# The class that will be attached on the anchored rendered markdown header
|
42
|
+
def classes
|
43
|
+
context[:classes] || "anchor"
|
44
|
+
end
|
45
|
+
|
46
|
+
def after_initialize
|
47
|
+
result[:toc] = []
|
48
|
+
end
|
49
|
+
|
50
|
+
def handle_element(element)
|
51
|
+
header_href = element["href"]
|
52
|
+
|
53
|
+
return unless header_href.start_with?("#")
|
54
|
+
|
55
|
+
header_id = header_href[1..-1]
|
56
|
+
|
57
|
+
element["id"] = header_id
|
58
|
+
element["class"] = classes
|
59
|
+
|
60
|
+
element.set_inner_content(anchor_html, as: :html)
|
61
|
+
|
62
|
+
result[:toc] << { href: header_href }
|
63
|
+
end
|
64
|
+
|
65
|
+
def handle_text_chunk(text)
|
66
|
+
result[:toc].last[:text] = text.to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter
|
7
|
+
# HTML filter that replaces @org/team mentions with links. Mentions within
|
8
|
+
# <pre>, <code>, <a>, <style>, and <script> elements are ignored.
|
9
|
+
#
|
10
|
+
# Context options:
|
11
|
+
# :base_url - Used to construct links to team profile pages for each
|
12
|
+
# mention.
|
13
|
+
# :team_pattern - Used to provide a custom regular expression to
|
14
|
+
# identify team names
|
15
|
+
#
|
16
|
+
class TeamMentionFilter < NodeFilter
|
17
|
+
class << self
|
18
|
+
# Public: Find @org/team mentions in text. See
|
19
|
+
# TeamMentionFilter#team_mention_link_filter.
|
20
|
+
#
|
21
|
+
# TeamMentionFilter.mentioned_teams_in(text) do |match, org, team|
|
22
|
+
# "<a href=...>#{team}</a>"
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# text - String text to search.
|
26
|
+
#
|
27
|
+
# Yields the String match, org name, and team name. The yield's
|
28
|
+
# return replaces the match in the original text.
|
29
|
+
#
|
30
|
+
# Returns a String replaced with the return of the block.
|
31
|
+
def mentioned_teams_in(text, team_pattern = TEAM_PATTERN)
|
32
|
+
text.gsub(team_pattern) do |match|
|
33
|
+
org = Regexp.last_match(1)
|
34
|
+
team = Regexp.last_match(2)
|
35
|
+
yield match, org, team
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Default pattern used to extract team names from text. The value can be
|
41
|
+
# overridden by providing the team_pattern variable in the context. To
|
42
|
+
# properly link the mention, should be in the format of /@(1)\/(2)/.
|
43
|
+
TEAM_PATTERN = %r{
|
44
|
+
(?<=^|\W) # beginning of string or non-word char
|
45
|
+
@([a-z0-9][a-z0-9-]*) # @organization
|
46
|
+
(?:/|&\#47;?) # dividing slash
|
47
|
+
([a-z0-9][a-z0-9\-_]*) # team
|
48
|
+
\b
|
49
|
+
}ix
|
50
|
+
|
51
|
+
# Don't look for mentions in text nodes that are children of these elements
|
52
|
+
IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
|
53
|
+
|
54
|
+
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
|
55
|
+
|
56
|
+
def after_initialize
|
57
|
+
result[:mentioned_teams] = []
|
58
|
+
end
|
59
|
+
|
60
|
+
def selector
|
61
|
+
SELECTOR
|
62
|
+
end
|
63
|
+
|
64
|
+
def handle_text_chunk(text)
|
65
|
+
content = text.to_s
|
66
|
+
return unless content.include?("@")
|
67
|
+
|
68
|
+
text.replace(mention_link_filter(content, base_url: base_url, team_pattern: team_pattern), as: :html)
|
69
|
+
end
|
70
|
+
|
71
|
+
def team_pattern
|
72
|
+
context[:team_pattern] || TEAM_PATTERN
|
73
|
+
end
|
74
|
+
|
75
|
+
# Replace @org/team mentions in text with links to the mentioned team's
|
76
|
+
# page.
|
77
|
+
#
|
78
|
+
# text - String text to replace @mention team names in.
|
79
|
+
# base_url - The base URL used to construct team page URLs.
|
80
|
+
# team_pattern - Regular expression used to identify teams in text
|
81
|
+
#
|
82
|
+
# Returns a string with @team mentions replaced with links. All links have a
|
83
|
+
# 'team-mention' class name attached for styling.
|
84
|
+
def mention_link_filter(text, base_url: "/", team_pattern: TEAM_PATTERN)
|
85
|
+
self.class.mentioned_teams_in(text, team_pattern) do |match, org, team|
|
86
|
+
link = link_to_mentioned_team(base_url, org, team)
|
87
|
+
seperator = %r{/|&\#47;?}
|
88
|
+
|
89
|
+
link ? match.sub(/@#{org}#{seperator}#{team}/, link) : match
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def link_to_mentioned_team(base_url, org, team)
|
94
|
+
result[:mentioned_teams] |= [team]
|
95
|
+
|
96
|
+
url = base_url.dup
|
97
|
+
url << "/" unless %r{[/~]\z}.match?(url)
|
98
|
+
|
99
|
+
"<a href=\"#{url << org}/#{team}\" class=\"team-mention\">" \
|
100
|
+
"@#{org}/#{team}" \
|
101
|
+
"</a>"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "selma"
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter < Filter
|
7
|
+
def initialize(context: {}, result: {})
|
8
|
+
super(context: context, result: {})
|
9
|
+
send(:after_initialize) if respond_to?(:after_initialize)
|
10
|
+
end
|
11
|
+
|
12
|
+
# The String representation of the document.
|
13
|
+
def html
|
14
|
+
raise InvalidDocumentException if @html.nil? && @doc.nil?
|
15
|
+
|
16
|
+
@html || doc.to_html
|
17
|
+
end
|
18
|
+
|
19
|
+
def reset!
|
20
|
+
result = {} # rubocop:disable Lint/UselessAssignment
|
21
|
+
send(:after_initialize) if respond_to?(:after_initialize)
|
22
|
+
end
|
23
|
+
|
24
|
+
class << self
|
25
|
+
def call(html, context: {}, result: {})
|
26
|
+
node_filter = new(context: context, result: result)
|
27
|
+
Selma::Rewriter.new(sanitizer: nil, handlers: [node_filter]).rewrite(html)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class HTMLPipeline
|
4
|
+
# A special filter with sanization routines and allowlists. This module defines
|
5
|
+
# what HTML is allowed in user provided content and fixes up issues with
|
6
|
+
# unbalanced tags and whatnot.
|
7
|
+
#
|
8
|
+
# See the Selma docs for more information on the underlying library:
|
9
|
+
#
|
10
|
+
# https://github.com/gjtorikian/selma/#readme
|
11
|
+
#
|
12
|
+
# This filter does not write additional information to the context.
|
13
|
+
class SanitizationFilter
|
14
|
+
VALID_PROTOCOLS = Selma::Sanitizer::Config::VALID_PROTOCOLS.dup
|
15
|
+
|
16
|
+
# The main sanitization allowlist. Only these elements and attributes are
|
17
|
+
# allowed through by default.
|
18
|
+
DEFAULT_CONFIG = Selma::Sanitizer::Config.freeze_config({
|
19
|
+
elements: [
|
20
|
+
"h1",
|
21
|
+
"h2",
|
22
|
+
"h3",
|
23
|
+
"h4",
|
24
|
+
"h5",
|
25
|
+
"h6",
|
26
|
+
"br",
|
27
|
+
"b",
|
28
|
+
"i",
|
29
|
+
"strong",
|
30
|
+
"em",
|
31
|
+
"a",
|
32
|
+
"pre",
|
33
|
+
"code",
|
34
|
+
"img",
|
35
|
+
"tt",
|
36
|
+
"div",
|
37
|
+
"ins",
|
38
|
+
"del",
|
39
|
+
"sup",
|
40
|
+
"sub",
|
41
|
+
"p",
|
42
|
+
"picture",
|
43
|
+
"ol",
|
44
|
+
"ul",
|
45
|
+
"table",
|
46
|
+
"thead",
|
47
|
+
"tbody",
|
48
|
+
"tfoot",
|
49
|
+
"blockquote",
|
50
|
+
"dl",
|
51
|
+
"dt",
|
52
|
+
"dd",
|
53
|
+
"kbd",
|
54
|
+
"q",
|
55
|
+
"samp",
|
56
|
+
"var",
|
57
|
+
"hr",
|
58
|
+
"ruby",
|
59
|
+
"rt",
|
60
|
+
"rp",
|
61
|
+
"li",
|
62
|
+
"tr",
|
63
|
+
"td",
|
64
|
+
"th",
|
65
|
+
"s",
|
66
|
+
"strike",
|
67
|
+
"summary",
|
68
|
+
"details",
|
69
|
+
"caption",
|
70
|
+
"figure",
|
71
|
+
"figcaption",
|
72
|
+
"abbr",
|
73
|
+
"bdo",
|
74
|
+
"cite",
|
75
|
+
"dfn",
|
76
|
+
"mark",
|
77
|
+
"small",
|
78
|
+
"source",
|
79
|
+
"span",
|
80
|
+
"time",
|
81
|
+
"wbr",
|
82
|
+
],
|
83
|
+
|
84
|
+
attributes: {
|
85
|
+
"a" => ["href"],
|
86
|
+
"img" => ["src", "longdesc", "loading", "alt"],
|
87
|
+
"div" => ["itemscope", "itemtype"],
|
88
|
+
"blockquote" => ["cite"],
|
89
|
+
"del" => ["cite"],
|
90
|
+
"ins" => ["cite"],
|
91
|
+
"q" => ["cite"],
|
92
|
+
"source" => ["srcset"],
|
93
|
+
all: [
|
94
|
+
"abbr",
|
95
|
+
"accept",
|
96
|
+
"accept-charset",
|
97
|
+
"accesskey",
|
98
|
+
"action",
|
99
|
+
"align",
|
100
|
+
"alt",
|
101
|
+
"aria-describedby",
|
102
|
+
"aria-hidden",
|
103
|
+
"aria-label",
|
104
|
+
"aria-labelledby",
|
105
|
+
"axis",
|
106
|
+
"border",
|
107
|
+
"char",
|
108
|
+
"charoff",
|
109
|
+
"charset",
|
110
|
+
"checked",
|
111
|
+
"clear",
|
112
|
+
"cols",
|
113
|
+
"colspan",
|
114
|
+
"compact",
|
115
|
+
"coords",
|
116
|
+
"datetime",
|
117
|
+
"dir",
|
118
|
+
"disabled",
|
119
|
+
"enctype",
|
120
|
+
"for",
|
121
|
+
"frame",
|
122
|
+
"headers",
|
123
|
+
"height",
|
124
|
+
"hreflang",
|
125
|
+
"hspace",
|
126
|
+
"id",
|
127
|
+
"ismap",
|
128
|
+
"label",
|
129
|
+
"lang",
|
130
|
+
"maxlength",
|
131
|
+
"media",
|
132
|
+
"method",
|
133
|
+
"multiple",
|
134
|
+
"name",
|
135
|
+
"nohref",
|
136
|
+
"noshade",
|
137
|
+
"nowrap",
|
138
|
+
"open",
|
139
|
+
"progress",
|
140
|
+
"prompt",
|
141
|
+
"readonly",
|
142
|
+
"rel",
|
143
|
+
"rev",
|
144
|
+
"role",
|
145
|
+
"rows",
|
146
|
+
"rowspan",
|
147
|
+
"rules",
|
148
|
+
"scope",
|
149
|
+
"selected",
|
150
|
+
"shape",
|
151
|
+
"size",
|
152
|
+
"span",
|
153
|
+
"start",
|
154
|
+
"summary",
|
155
|
+
"tabindex",
|
156
|
+
"title",
|
157
|
+
"type",
|
158
|
+
"usemap",
|
159
|
+
"valign",
|
160
|
+
"value",
|
161
|
+
"width",
|
162
|
+
"itemprop",
|
163
|
+
],
|
164
|
+
},
|
165
|
+
protocols: {
|
166
|
+
"a" => { "href" => Selma::Sanitizer::Config::VALID_PROTOCOLS }.freeze,
|
167
|
+
"blockquote" => { "cite" => ["http", "https", :relative].freeze },
|
168
|
+
"del" => { "cite" => ["http", "https", :relative].freeze },
|
169
|
+
"ins" => { "cite" => ["http", "https", :relative].freeze },
|
170
|
+
"q" => { "cite" => ["http", "https", :relative].freeze },
|
171
|
+
"img" => {
|
172
|
+
"src" => ["http", "https", :relative].freeze,
|
173
|
+
"longdesc" => ["http", "https", :relative].freeze,
|
174
|
+
},
|
175
|
+
},
|
176
|
+
})
|
177
|
+
|
178
|
+
class << self
|
179
|
+
def call(html, config)
|
180
|
+
raise ArgumentError, "html must be a String, not #{html.class}" unless html.is_a?(String)
|
181
|
+
raise ArgumentError, "config must be a Hash, not #{config.class}" unless config.is_a?(Hash)
|
182
|
+
|
183
|
+
sanitization_config = Selma::Sanitizer.new(config)
|
184
|
+
Selma::Rewriter.new(sanitizer: sanitization_config).rewrite(html)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
class
|
3
|
+
class HTMLPipeline
|
4
|
+
class TextFilter
|
5
5
|
# HTML Filter that converts image's url into <img> tag.
|
6
6
|
# For example, it will convert
|
7
7
|
# http://example.com/test.jpg
|
@@ -10,7 +10,7 @@ module HTML
|
|
10
10
|
|
11
11
|
class ImageFilter < TextFilter
|
12
12
|
def call
|
13
|
-
@text.gsub(
|
13
|
+
@text.gsub(%r{(https|http)?://.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?}i) do |match|
|
14
14
|
%(<img src="#{match}" alt=""/>)
|
15
15
|
end
|
16
16
|
end
|
@@ -1,14 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
3
|
+
class HTMLPipeline
|
4
|
+
class TextFilter
|
7
5
|
# Simple filter for plain text input. HTML escapes the text input and wraps it
|
8
6
|
# in a div.
|
9
7
|
class PlainTextInputFilter < TextFilter
|
10
8
|
def call
|
11
|
-
"<div>#{CGI.
|
9
|
+
"<div>#{CGI.escapeHTML(@text)}</div>"
|
12
10
|
end
|
13
11
|
end
|
14
12
|
end
|