html-pipeline 2.14.3 → 3.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/FUNDING.yml +11 -3
- data/.github/dependabot.yml +27 -0
- data/.github/workflows/automerge.yml +13 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/lint.yml +23 -0
- data/.github/workflows/publish.yml +19 -0
- data/.rubocop.yml +17 -0
- data/.ruby-version +1 -0
- data/.vscode/settings.json +8 -0
- data/CHANGELOG.md +128 -2
- data/Gemfile +31 -15
- data/{LICENSE → LICENSE.txt} +2 -2
- data/README.md +241 -224
- data/Rakefile +14 -7
- data/UPGRADING.md +34 -0
- data/html-pipeline.gemspec +31 -21
- data/lib/html-pipeline.rb +3 -0
- data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
- data/lib/html_pipeline/convert_filter.rb +17 -0
- data/lib/html_pipeline/filter.rb +89 -0
- data/lib/html_pipeline/node_filter/absolute_source_filter.rb +54 -0
- data/lib/html_pipeline/node_filter/asset_proxy_filter.rb +86 -0
- data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
- data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
- data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
- data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +54 -68
- data/lib/html_pipeline/node_filter/syntax_highlight_filter.rb +62 -0
- data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +70 -0
- data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
- data/lib/html_pipeline/node_filter.rb +31 -0
- data/lib/html_pipeline/sanitization_filter.rb +190 -0
- data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
- data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
- data/lib/html_pipeline/text_filter.rb +21 -0
- data/lib/html_pipeline/version.rb +5 -0
- data/lib/html_pipeline.rb +281 -0
- metadata +58 -54
- data/.travis.yml +0 -43
- data/Appraisals +0 -19
- data/CONTRIBUTING.md +0 -60
- data/bin/html-pipeline +0 -78
- data/lib/html/pipeline/@team_mention_filter.rb +0 -99
- data/lib/html/pipeline/absolute_source_filter.rb +0 -52
- data/lib/html/pipeline/autolink_filter.rb +0 -34
- data/lib/html/pipeline/body_content.rb +0 -44
- data/lib/html/pipeline/camo_filter.rb +0 -105
- data/lib/html/pipeline/email_reply_filter.rb +0 -69
- data/lib/html/pipeline/filter.rb +0 -165
- data/lib/html/pipeline/https_filter.rb +0 -29
- data/lib/html/pipeline/image_max_width_filter.rb +0 -37
- data/lib/html/pipeline/markdown_filter.rb +0 -56
- data/lib/html/pipeline/sanitization_filter.rb +0 -144
- data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
- data/lib/html/pipeline/text_filter.rb +0 -16
- data/lib/html/pipeline/textile_filter.rb +0 -25
- data/lib/html/pipeline/toc_filter.rb +0 -69
- data/lib/html/pipeline/version.rb +0 -7
- data/lib/html/pipeline.rb +0 -210
@@ -1,9 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "set"
|
4
4
|
|
5
|
-
|
6
|
-
class
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter
|
7
7
|
# HTML filter that replaces @user mentions with links. Mentions within <pre>,
|
8
8
|
# <code>, and <a> elements are ignored. Mentions that reference users that do
|
9
9
|
# not exist are ignored.
|
@@ -16,71 +16,69 @@ module HTML
|
|
16
16
|
# :username_pattern - Used to provide a custom regular expression to
|
17
17
|
# identify usernames
|
18
18
|
#
|
19
|
-
class MentionFilter <
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
text
|
36
|
-
|
37
|
-
|
19
|
+
class MentionFilter < NodeFilter
|
20
|
+
class << self
|
21
|
+
# Public: Find user @mentions in text. See
|
22
|
+
# MentionFilter#mention_link_filter.
|
23
|
+
#
|
24
|
+
# MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
|
25
|
+
# "<a href=...>#{login}</a>"
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# text - String text to search.
|
29
|
+
#
|
30
|
+
# Yields the String match, the String login name, and a Boolean determining
|
31
|
+
# if the match = "@mention[ed]". The yield's return replaces the match in
|
32
|
+
# the original text.
|
33
|
+
#
|
34
|
+
# Returns a String replaced with the return of the block.
|
35
|
+
def mentioned_logins_in(text, username_pattern = USERNAME_PATTERN)
|
36
|
+
text.gsub(MENTION_PATTERNS[username_pattern]) do |match|
|
37
|
+
login = Regexp.last_match(1)
|
38
|
+
yield match, login
|
39
|
+
end
|
38
40
|
end
|
39
41
|
end
|
40
|
-
|
41
42
|
# Hash that contains all of the mention patterns used by the pipeline
|
42
|
-
|
43
|
-
hash[key] =
|
43
|
+
MENTION_PATTERNS = Hash.new do |hash, key|
|
44
|
+
hash[key] = %r{
|
44
45
|
(?:^|\W) # beginning of string or non-word char
|
45
46
|
@((?>#{key})) # @username
|
46
|
-
(
|
47
|
+
(?!/) # without a trailing slash
|
47
48
|
(?=
|
48
49
|
\.+[ \t\W]| # dots followed by space or non-word character
|
49
50
|
\.+$| # dots at end of line
|
50
51
|
[^0-9a-zA-Z_.]| # non-word character except dot
|
51
52
|
$ # end of line
|
52
53
|
)
|
53
|
-
|
54
|
+
}ix
|
54
55
|
end
|
55
56
|
|
56
57
|
# Default pattern used to extract usernames from text. The value can be
|
57
58
|
# overriden by providing the username_pattern variable in the context.
|
58
|
-
|
59
|
-
|
60
|
-
# List of username logins that, when mentioned, link to the blog post
|
61
|
-
# about @mentions instead of triggering a real mention.
|
62
|
-
MentionLogins = %w[
|
63
|
-
mention
|
64
|
-
mentions
|
65
|
-
mentioned
|
66
|
-
mentioning
|
67
|
-
].freeze
|
59
|
+
USERNAME_PATTERN = /[a-z0-9][a-z0-9-]*/
|
68
60
|
|
69
61
|
# Don't look for mentions in text nodes that are children of these elements
|
70
|
-
IGNORE_PARENTS =
|
62
|
+
IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
|
71
63
|
|
72
|
-
|
64
|
+
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
|
65
|
+
|
66
|
+
def after_initialize
|
73
67
|
result[:mentioned_usernames] ||= []
|
68
|
+
end
|
74
69
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
70
|
+
def selector
|
71
|
+
SELECTOR
|
72
|
+
end
|
73
|
+
|
74
|
+
def handle_text_chunk(text)
|
75
|
+
content = text.to_s
|
76
|
+
return unless content.include?("@")
|
77
|
+
|
78
|
+
html = mention_link_filter(content, base_url: base_url, username_pattern: username_pattern)
|
79
|
+
return if html == content
|
80
|
+
|
81
|
+
text.replace(html, as: :html)
|
84
82
|
end
|
85
83
|
|
86
84
|
# The URL to provide when someone @mentions a "mention" name, such
|
@@ -90,7 +88,7 @@ module HTML
|
|
90
88
|
end
|
91
89
|
|
92
90
|
def username_pattern
|
93
|
-
context[:username_pattern] ||
|
91
|
+
context[:username_pattern] || USERNAME_PATTERN
|
94
92
|
end
|
95
93
|
|
96
94
|
# Replace user @mentions in text with links to the mentioned user's
|
@@ -105,35 +103,23 @@ module HTML
|
|
105
103
|
#
|
106
104
|
# Returns a string with @mentions replaced with links. All links have a
|
107
105
|
# 'user-mention' class name attached for styling.
|
108
|
-
def mention_link_filter(text,
|
109
|
-
self.class.mentioned_logins_in(text, username_pattern) do |match, login
|
110
|
-
link =
|
111
|
-
if is_mentioned
|
112
|
-
link_to_mention_info(login, info_url)
|
113
|
-
else
|
114
|
-
link_to_mentioned_user(login)
|
115
|
-
end
|
106
|
+
def mention_link_filter(text, base_url: "/", username_pattern: USERNAME_PATTERN)
|
107
|
+
self.class.mentioned_logins_in(text, username_pattern) do |match, login|
|
108
|
+
link = link_to_mentioned_user(base_url, login)
|
116
109
|
|
117
110
|
link ? match.sub("@#{login}", link) : match
|
118
111
|
end
|
119
112
|
end
|
120
113
|
|
121
|
-
def
|
122
|
-
return "@#{text}" if info_url.nil?
|
123
|
-
"<a href='#{info_url}' class='user-mention'>" \
|
124
|
-
"@#{text}" \
|
125
|
-
'</a>'
|
126
|
-
end
|
127
|
-
|
128
|
-
def link_to_mentioned_user(login)
|
114
|
+
def link_to_mentioned_user(base_url, login)
|
129
115
|
result[:mentioned_usernames] |= [login]
|
130
116
|
|
131
117
|
url = base_url.dup
|
132
|
-
url <<
|
118
|
+
url << "/" unless %r{[/~]\z}.match?(url)
|
133
119
|
|
134
|
-
"<a href
|
120
|
+
"<a href=\"#{url << login}\" class=\"user-mention\">" \
|
135
121
|
"@#{login}" \
|
136
|
-
|
122
|
+
"</a>"
|
137
123
|
end
|
138
124
|
end
|
139
125
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
HTMLPipeline.require_dependency("rouge", "SyntaxHighlightFilter")
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter
|
7
|
+
# HTML Filter that syntax highlights text inside code blocks.
|
8
|
+
#
|
9
|
+
# Context options:
|
10
|
+
#
|
11
|
+
# :highlight => String represents the language to pick lexer. Defaults to empty string.
|
12
|
+
# :scope => String represents the class attribute adds to pre element after.
|
13
|
+
# Defaults to "highlight highlight-css" if highlights a css code block.
|
14
|
+
#
|
15
|
+
# This filter does not write any additional information to the context hash.
|
16
|
+
class SyntaxHighlightFilter < NodeFilter
|
17
|
+
def initialize(context: {}, result: {})
|
18
|
+
super(context: context, result: result)
|
19
|
+
# TODO: test the optionality of this
|
20
|
+
@formatter = context[:formatter] || Rouge::Formatters::HTML.new
|
21
|
+
end
|
22
|
+
|
23
|
+
SELECTOR = Selma::Selector.new(match_element: "pre", match_text_within: "pre")
|
24
|
+
|
25
|
+
def selector
|
26
|
+
SELECTOR
|
27
|
+
end
|
28
|
+
|
29
|
+
def handle_element(element)
|
30
|
+
default = context[:highlight]&.to_s
|
31
|
+
@lang = element["lang"] || default
|
32
|
+
|
33
|
+
scope = context.fetch(:scope, "highlight")
|
34
|
+
|
35
|
+
element["class"] = "#{scope} #{scope}-#{@lang}" if include_lang?
|
36
|
+
end
|
37
|
+
|
38
|
+
def handle_text_chunk(text)
|
39
|
+
return if @lang.nil?
|
40
|
+
return if (lexer = lexer_for(@lang)).nil?
|
41
|
+
|
42
|
+
content = text.to_s
|
43
|
+
|
44
|
+
text.replace(highlight_with_timeout_handling(content, lexer), as: :html)
|
45
|
+
end
|
46
|
+
|
47
|
+
def highlight_with_timeout_handling(text, lexer)
|
48
|
+
Rouge.highlight(text, lexer, @formatter)
|
49
|
+
rescue Timeout::Error => _e
|
50
|
+
text
|
51
|
+
end
|
52
|
+
|
53
|
+
def lexer_for(lang)
|
54
|
+
Rouge::Lexer.find(lang)
|
55
|
+
end
|
56
|
+
|
57
|
+
def include_lang?
|
58
|
+
!@lang.nil? && !@lang.empty?
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class HTMLPipeline
|
4
|
+
class NodeFilter
|
5
|
+
# Generates a Table of Contents: an array of hashes containing:
|
6
|
+
# * `href`: the relative link to the header
|
7
|
+
# * `text`: the text of the header
|
8
|
+
|
9
|
+
# Examples
|
10
|
+
#
|
11
|
+
# TocPipeline =
|
12
|
+
# HTMLPipeline.new [
|
13
|
+
# HTMLPipeline::TableOfContentsFilter
|
14
|
+
# ]
|
15
|
+
# # => #<HTMLPipeline:0x007fc13c4528d8...>
|
16
|
+
# orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
|
17
|
+
# # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
|
18
|
+
# result = {}
|
19
|
+
# # => {}
|
20
|
+
# TocPipeline.call(orig, {}, result)
|
21
|
+
# # => {:toc=> ...}
|
22
|
+
# result[:toc]
|
23
|
+
# # => "{:href=>"#ice-cube", :text=>"Ice cube"}"
|
24
|
+
# result[:output].to_s
|
25
|
+
# # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
|
26
|
+
class TableOfContentsFilter < NodeFilter
|
27
|
+
SELECTOR = Selma::Selector.new(
|
28
|
+
match_element: "h1 a[href], h2 a[href], h3 a[href], h4 a[href], h5 a[href], h6 a[href]",
|
29
|
+
match_text_within: "h1, h2, h3, h4, h5, h6",
|
30
|
+
)
|
31
|
+
|
32
|
+
def selector
|
33
|
+
SELECTOR
|
34
|
+
end
|
35
|
+
|
36
|
+
# The icon that will be placed next to an anchored rendered markdown header
|
37
|
+
def anchor_html
|
38
|
+
@context[:anchor_html] || %(<span aria-hidden="true" class="anchor"></span>)
|
39
|
+
end
|
40
|
+
|
41
|
+
# The class that will be attached on the anchored rendered markdown header
|
42
|
+
def classes
|
43
|
+
context[:classes] || "anchor"
|
44
|
+
end
|
45
|
+
|
46
|
+
def after_initialize
|
47
|
+
result[:toc] = []
|
48
|
+
end
|
49
|
+
|
50
|
+
def handle_element(element)
|
51
|
+
header_href = element["href"]
|
52
|
+
|
53
|
+
return unless header_href.start_with?("#")
|
54
|
+
|
55
|
+
header_id = header_href[1..-1]
|
56
|
+
|
57
|
+
element["id"] = header_id
|
58
|
+
element["class"] = classes
|
59
|
+
|
60
|
+
element.set_inner_content(anchor_html, as: :html)
|
61
|
+
|
62
|
+
result[:toc] << { href: header_href }
|
63
|
+
end
|
64
|
+
|
65
|
+
def handle_text_chunk(text)
|
66
|
+
result[:toc].last[:text] = text.to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter
|
7
|
+
# HTML filter that replaces @org/team mentions with links. Mentions within
|
8
|
+
# <pre>, <code>, <a>, <style>, and <script> elements are ignored.
|
9
|
+
#
|
10
|
+
# Context options:
|
11
|
+
# :base_url - Used to construct links to team profile pages for each
|
12
|
+
# mention.
|
13
|
+
# :team_pattern - Used to provide a custom regular expression to
|
14
|
+
# identify team names
|
15
|
+
#
|
16
|
+
class TeamMentionFilter < NodeFilter
|
17
|
+
class << self
|
18
|
+
# Public: Find @org/team mentions in text. See
|
19
|
+
# TeamMentionFilter#team_mention_link_filter.
|
20
|
+
#
|
21
|
+
# TeamMentionFilter.mentioned_teams_in(text) do |match, org, team|
|
22
|
+
# "<a href=...>#{team}</a>"
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# text - String text to search.
|
26
|
+
#
|
27
|
+
# Yields the String match, org name, and team name. The yield's
|
28
|
+
# return replaces the match in the original text.
|
29
|
+
#
|
30
|
+
# Returns a String replaced with the return of the block.
|
31
|
+
def mentioned_teams_in(text, team_pattern = TEAM_PATTERN)
|
32
|
+
text.gsub(team_pattern) do |match|
|
33
|
+
org = Regexp.last_match(1)
|
34
|
+
team = Regexp.last_match(2)
|
35
|
+
yield match, org, team
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Default pattern used to extract team names from text. The value can be
|
41
|
+
# overridden by providing the team_pattern variable in the context. To
|
42
|
+
# properly link the mention, should be in the format of /@(1)\/(2)/.
|
43
|
+
TEAM_PATTERN = %r{
|
44
|
+
(?<=^|\W) # beginning of string or non-word char
|
45
|
+
@([a-z0-9][a-z0-9-]*) # @organization
|
46
|
+
(?:/|&\#47;?) # dividing slash
|
47
|
+
([a-z0-9][a-z0-9\-_]*) # team
|
48
|
+
\b
|
49
|
+
}ix
|
50
|
+
|
51
|
+
# Don't look for mentions in text nodes that are children of these elements
|
52
|
+
IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
|
53
|
+
|
54
|
+
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
|
55
|
+
|
56
|
+
def after_initialize
|
57
|
+
result[:mentioned_teams] = []
|
58
|
+
end
|
59
|
+
|
60
|
+
def selector
|
61
|
+
SELECTOR
|
62
|
+
end
|
63
|
+
|
64
|
+
def handle_text_chunk(text)
|
65
|
+
content = text.to_s
|
66
|
+
return unless content.include?("@")
|
67
|
+
|
68
|
+
text.replace(mention_link_filter(content, base_url: base_url, team_pattern: team_pattern), as: :html)
|
69
|
+
end
|
70
|
+
|
71
|
+
def team_pattern
|
72
|
+
context[:team_pattern] || TEAM_PATTERN
|
73
|
+
end
|
74
|
+
|
75
|
+
# Replace @org/team mentions in text with links to the mentioned team's
|
76
|
+
# page.
|
77
|
+
#
|
78
|
+
# text - String text to replace @mention team names in.
|
79
|
+
# base_url - The base URL used to construct team page URLs.
|
80
|
+
# team_pattern - Regular expression used to identify teams in text
|
81
|
+
#
|
82
|
+
# Returns a string with @team mentions replaced with links. All links have a
|
83
|
+
# 'team-mention' class name attached for styling.
|
84
|
+
def mention_link_filter(text, base_url: "/", team_pattern: TEAM_PATTERN)
|
85
|
+
self.class.mentioned_teams_in(text, team_pattern) do |match, org, team|
|
86
|
+
link = link_to_mentioned_team(base_url, org, team)
|
87
|
+
seperator = %r{/|&\#47;?}
|
88
|
+
|
89
|
+
link ? match.sub(/@#{org}#{seperator}#{team}/, link) : match
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def link_to_mentioned_team(base_url, org, team)
|
94
|
+
result[:mentioned_teams] |= [team]
|
95
|
+
|
96
|
+
url = base_url.dup
|
97
|
+
url << "/" unless %r{[/~]\z}.match?(url)
|
98
|
+
|
99
|
+
"<a href=\"#{url << org}/#{team}\" class=\"team-mention\">" \
|
100
|
+
"@#{org}/#{team}" \
|
101
|
+
"</a>"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "selma"
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
class NodeFilter < Filter
|
7
|
+
def initialize(context: {}, result: {})
|
8
|
+
super(context: context, result: {})
|
9
|
+
send(:after_initialize) if respond_to?(:after_initialize)
|
10
|
+
end
|
11
|
+
|
12
|
+
# The String representation of the document.
|
13
|
+
def html
|
14
|
+
raise InvalidDocumentException if @html.nil? && @doc.nil?
|
15
|
+
|
16
|
+
@html || doc.to_html
|
17
|
+
end
|
18
|
+
|
19
|
+
def reset!
|
20
|
+
result = {} # rubocop:disable Lint/UselessAssignment
|
21
|
+
send(:after_initialize) if respond_to?(:after_initialize)
|
22
|
+
end
|
23
|
+
|
24
|
+
class << self
|
25
|
+
def call(html, context: {}, result: {})
|
26
|
+
node_filter = new(context: context, result: result)
|
27
|
+
Selma::Rewriter.new(sanitizer: nil, handlers: [node_filter]).rewrite(html)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "selma"
|
4
|
+
|
5
|
+
class HTMLPipeline
|
6
|
+
# A special filter with sanization routines and allowlists. This module defines
|
7
|
+
# what HTML is allowed in user provided content and fixes up issues with
|
8
|
+
# unbalanced tags and whatnot.
|
9
|
+
#
|
10
|
+
# See the Selma docs for more information on the underlying library:
|
11
|
+
#
|
12
|
+
# https://github.com/gjtorikian/selma/#readme
|
13
|
+
#
|
14
|
+
# This filter does not write additional information to the context.
|
15
|
+
class SanitizationFilter
|
16
|
+
VALID_PROTOCOLS = Selma::Sanitizer::Config::VALID_PROTOCOLS.dup
|
17
|
+
|
18
|
+
# The main sanitization allowlist. Only these elements and attributes are
|
19
|
+
# allowed through by default.
|
20
|
+
DEFAULT_CONFIG = Selma::Sanitizer::Config.freeze_config({
|
21
|
+
elements: [
|
22
|
+
"h1",
|
23
|
+
"h2",
|
24
|
+
"h3",
|
25
|
+
"h4",
|
26
|
+
"h5",
|
27
|
+
"h6",
|
28
|
+
"br",
|
29
|
+
"b",
|
30
|
+
"i",
|
31
|
+
"strong",
|
32
|
+
"em",
|
33
|
+
"a",
|
34
|
+
"pre",
|
35
|
+
"code",
|
36
|
+
"img",
|
37
|
+
"tt",
|
38
|
+
"div",
|
39
|
+
"ins",
|
40
|
+
"del",
|
41
|
+
"sup",
|
42
|
+
"sub",
|
43
|
+
"p",
|
44
|
+
"picture",
|
45
|
+
"ol",
|
46
|
+
"ul",
|
47
|
+
"table",
|
48
|
+
"thead",
|
49
|
+
"tbody",
|
50
|
+
"tfoot",
|
51
|
+
"blockquote",
|
52
|
+
"dl",
|
53
|
+
"dt",
|
54
|
+
"dd",
|
55
|
+
"kbd",
|
56
|
+
"q",
|
57
|
+
"samp",
|
58
|
+
"var",
|
59
|
+
"hr",
|
60
|
+
"ruby",
|
61
|
+
"rt",
|
62
|
+
"rp",
|
63
|
+
"li",
|
64
|
+
"tr",
|
65
|
+
"td",
|
66
|
+
"th",
|
67
|
+
"s",
|
68
|
+
"strike",
|
69
|
+
"summary",
|
70
|
+
"details",
|
71
|
+
"caption",
|
72
|
+
"figure",
|
73
|
+
"figcaption",
|
74
|
+
"abbr",
|
75
|
+
"bdo",
|
76
|
+
"cite",
|
77
|
+
"dfn",
|
78
|
+
"mark",
|
79
|
+
"small",
|
80
|
+
"source",
|
81
|
+
"span",
|
82
|
+
"time",
|
83
|
+
"wbr",
|
84
|
+
],
|
85
|
+
|
86
|
+
attributes: {
|
87
|
+
"a" => ["href"],
|
88
|
+
"img" => ["src", "longdesc", "loading", "alt"],
|
89
|
+
"div" => ["itemscope", "itemtype"],
|
90
|
+
"blockquote" => ["cite"],
|
91
|
+
"del" => ["cite"],
|
92
|
+
"ins" => ["cite"],
|
93
|
+
"q" => ["cite"],
|
94
|
+
"source" => ["srcset"],
|
95
|
+
all: [
|
96
|
+
"abbr",
|
97
|
+
"accept",
|
98
|
+
"accept-charset",
|
99
|
+
"accesskey",
|
100
|
+
"action",
|
101
|
+
"align",
|
102
|
+
"alt",
|
103
|
+
"aria-describedby",
|
104
|
+
"aria-hidden",
|
105
|
+
"aria-label",
|
106
|
+
"aria-labelledby",
|
107
|
+
"axis",
|
108
|
+
"border",
|
109
|
+
"char",
|
110
|
+
"charoff",
|
111
|
+
"charset",
|
112
|
+
"checked",
|
113
|
+
"clear",
|
114
|
+
"cols",
|
115
|
+
"colspan",
|
116
|
+
"compact",
|
117
|
+
"coords",
|
118
|
+
"datetime",
|
119
|
+
"dir",
|
120
|
+
"disabled",
|
121
|
+
"enctype",
|
122
|
+
"for",
|
123
|
+
"frame",
|
124
|
+
"headers",
|
125
|
+
"height",
|
126
|
+
"hreflang",
|
127
|
+
"hspace",
|
128
|
+
"id",
|
129
|
+
"ismap",
|
130
|
+
"label",
|
131
|
+
"lang",
|
132
|
+
"maxlength",
|
133
|
+
"media",
|
134
|
+
"method",
|
135
|
+
"multiple",
|
136
|
+
"name",
|
137
|
+
"nohref",
|
138
|
+
"noshade",
|
139
|
+
"nowrap",
|
140
|
+
"open",
|
141
|
+
"progress",
|
142
|
+
"prompt",
|
143
|
+
"readonly",
|
144
|
+
"rel",
|
145
|
+
"rev",
|
146
|
+
"role",
|
147
|
+
"rows",
|
148
|
+
"rowspan",
|
149
|
+
"rules",
|
150
|
+
"scope",
|
151
|
+
"selected",
|
152
|
+
"shape",
|
153
|
+
"size",
|
154
|
+
"span",
|
155
|
+
"start",
|
156
|
+
"summary",
|
157
|
+
"tabindex",
|
158
|
+
"title",
|
159
|
+
"type",
|
160
|
+
"usemap",
|
161
|
+
"valign",
|
162
|
+
"value",
|
163
|
+
"width",
|
164
|
+
"itemprop",
|
165
|
+
],
|
166
|
+
},
|
167
|
+
protocols: {
|
168
|
+
"a" => { "href" => Selma::Sanitizer::Config::VALID_PROTOCOLS }.freeze,
|
169
|
+
"blockquote" => { "cite" => ["http", "https", :relative].freeze },
|
170
|
+
"del" => { "cite" => ["http", "https", :relative].freeze },
|
171
|
+
"ins" => { "cite" => ["http", "https", :relative].freeze },
|
172
|
+
"q" => { "cite" => ["http", "https", :relative].freeze },
|
173
|
+
"img" => {
|
174
|
+
"src" => ["http", "https", :relative].freeze,
|
175
|
+
"longdesc" => ["http", "https", :relative].freeze,
|
176
|
+
},
|
177
|
+
},
|
178
|
+
})
|
179
|
+
|
180
|
+
class << self
|
181
|
+
def call(html, config)
|
182
|
+
raise ArgumentError, "html must be a String, not #{html.class}" unless html.is_a?(String)
|
183
|
+
raise ArgumentError, "config must be a Hash, not #{config.class}" unless config.is_a?(Hash)
|
184
|
+
|
185
|
+
sanitization_config = Selma::Sanitizer.new(config)
|
186
|
+
Selma::Rewriter.new(sanitizer: sanitization_config).rewrite(html)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
class
|
3
|
+
class HTMLPipeline
|
4
|
+
class TextFilter
|
5
5
|
# HTML Filter that converts image's url into <img> tag.
|
6
6
|
# For example, it will convert
|
7
7
|
# http://example.com/test.jpg
|
@@ -10,7 +10,7 @@ module HTML
|
|
10
10
|
|
11
11
|
class ImageFilter < TextFilter
|
12
12
|
def call
|
13
|
-
@text.gsub(
|
13
|
+
@text.gsub(%r{(https|http)?://.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?}i) do |match|
|
14
14
|
%(<img src="#{match}" alt=""/>)
|
15
15
|
end
|
16
16
|
end
|
@@ -1,14 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
3
|
+
class HTMLPipeline
|
4
|
+
class TextFilter
|
7
5
|
# Simple filter for plain text input. HTML escapes the text input and wraps it
|
8
6
|
# in a div.
|
9
7
|
class PlainTextInputFilter < TextFilter
|
10
8
|
def call
|
11
|
-
"<div>#{CGI.
|
9
|
+
"<div>#{CGI.escapeHTML(@text)}</div>"
|
12
10
|
end
|
13
11
|
end
|
14
12
|
end
|