html-pipeline 2.14.3 → 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.github/FUNDING.yml +11 -3
  3. data/.github/dependabot.yml +27 -0
  4. data/.github/workflows/automerge.yml +13 -0
  5. data/.github/workflows/ci.yml +22 -0
  6. data/.github/workflows/lint.yml +23 -0
  7. data/.github/workflows/publish.yml +19 -0
  8. data/.rubocop.yml +17 -0
  9. data/.ruby-version +1 -0
  10. data/.vscode/settings.json +8 -0
  11. data/CHANGELOG.md +119 -2
  12. data/Gemfile +31 -15
  13. data/{LICENSE → LICENSE.txt} +2 -2
  14. data/README.md +241 -224
  15. data/Rakefile +14 -7
  16. data/UPGRADING.md +34 -0
  17. data/html-pipeline.gemspec +31 -21
  18. data/lib/html-pipeline.rb +3 -0
  19. data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
  20. data/lib/html_pipeline/convert_filter.rb +17 -0
  21. data/lib/html_pipeline/filter.rb +89 -0
  22. data/lib/html_pipeline/node_filter/absolute_source_filter.rb +54 -0
  23. data/lib/html_pipeline/node_filter/asset_proxy_filter.rb +86 -0
  24. data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
  25. data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
  26. data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
  27. data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +54 -68
  28. data/lib/html_pipeline/node_filter/syntax_highlight_filter.rb +62 -0
  29. data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +70 -0
  30. data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
  31. data/lib/html_pipeline/node_filter.rb +31 -0
  32. data/lib/html_pipeline/sanitization_filter.rb +188 -0
  33. data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
  34. data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
  35. data/lib/html_pipeline/text_filter.rb +21 -0
  36. data/lib/html_pipeline/version.rb +5 -0
  37. data/lib/html_pipeline.rb +281 -0
  38. metadata +58 -54
  39. data/.travis.yml +0 -43
  40. data/Appraisals +0 -19
  41. data/CONTRIBUTING.md +0 -60
  42. data/bin/html-pipeline +0 -78
  43. data/lib/html/pipeline/@team_mention_filter.rb +0 -99
  44. data/lib/html/pipeline/absolute_source_filter.rb +0 -52
  45. data/lib/html/pipeline/autolink_filter.rb +0 -34
  46. data/lib/html/pipeline/body_content.rb +0 -44
  47. data/lib/html/pipeline/camo_filter.rb +0 -105
  48. data/lib/html/pipeline/email_reply_filter.rb +0 -69
  49. data/lib/html/pipeline/filter.rb +0 -165
  50. data/lib/html/pipeline/https_filter.rb +0 -29
  51. data/lib/html/pipeline/image_max_width_filter.rb +0 -37
  52. data/lib/html/pipeline/markdown_filter.rb +0 -56
  53. data/lib/html/pipeline/sanitization_filter.rb +0 -144
  54. data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
  55. data/lib/html/pipeline/text_filter.rb +0 -16
  56. data/lib/html/pipeline/textile_filter.rb +0 -25
  57. data/lib/html/pipeline/toc_filter.rb +0 -69
  58. data/lib/html/pipeline/version.rb +0 -7
  59. data/lib/html/pipeline.rb +0 -210
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'set'
3
+ require "set"
4
4
 
5
- module HTML
6
- class Pipeline
5
+ class HTMLPipeline
6
+ class NodeFilter
7
7
  # HTML filter that replaces @user mentions with links. Mentions within <pre>,
8
8
  # <code>, and <a> elements are ignored. Mentions that reference users that do
9
9
  # not exist are ignored.
@@ -16,71 +16,69 @@ module HTML
16
16
  # :username_pattern - Used to provide a custom regular expression to
17
17
  # identify usernames
18
18
  #
19
- class MentionFilter < Filter
20
- # Public: Find user @mentions in text. See
21
- # MentionFilter#mention_link_filter.
22
- #
23
- # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
24
- # "<a href=...>#{login}</a>"
25
- # end
26
- #
27
- # text - String text to search.
28
- #
29
- # Yields the String match, the String login name, and a Boolean determining
30
- # if the match = "@mention[ed]". The yield's return replaces the match in
31
- # the original text.
32
- #
33
- # Returns a String replaced with the return of the block.
34
- def self.mentioned_logins_in(text, username_pattern = UsernamePattern)
35
- text.gsub MentionPatterns[username_pattern] do |match|
36
- login = Regexp.last_match(1)
37
- yield match, login, MentionLogins.include?(login.downcase)
19
+ class MentionFilter < NodeFilter
20
+ class << self
21
+ # Public: Find user @mentions in text. See
22
+ # MentionFilter#mention_link_filter.
23
+ #
24
+ # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
25
+ # "<a href=...>#{login}</a>"
26
+ # end
27
+ #
28
+ # text - String text to search.
29
+ #
30
+ # Yields the String match, the String login name, and a Boolean determining
31
+ # if the match = "@mention[ed]". The yield's return replaces the match in
32
+ # the original text.
33
+ #
34
+ # Returns a String replaced with the return of the block.
35
+ def mentioned_logins_in(text, username_pattern = USERNAME_PATTERN)
36
+ text.gsub(MENTION_PATTERNS[username_pattern]) do |match|
37
+ login = Regexp.last_match(1)
38
+ yield match, login
39
+ end
38
40
  end
39
41
  end
40
-
41
42
  # Hash that contains all of the mention patterns used by the pipeline
42
- MentionPatterns = Hash.new do |hash, key|
43
- hash[key] = /
43
+ MENTION_PATTERNS = Hash.new do |hash, key|
44
+ hash[key] = %r{
44
45
  (?:^|\W) # beginning of string or non-word char
45
46
  @((?>#{key})) # @username
46
- (?!\/) # without a trailing slash
47
+ (?!/) # without a trailing slash
47
48
  (?=
48
49
  \.+[ \t\W]| # dots followed by space or non-word character
49
50
  \.+$| # dots at end of line
50
51
  [^0-9a-zA-Z_.]| # non-word character except dot
51
52
  $ # end of line
52
53
  )
53
- /ix
54
+ }ix
54
55
  end
55
56
 
56
57
  # Default pattern used to extract usernames from text. The value can be
57
58
  # overriden by providing the username_pattern variable in the context.
58
- UsernamePattern = /[a-z0-9][a-z0-9-]*/
59
-
60
- # List of username logins that, when mentioned, link to the blog post
61
- # about @mentions instead of triggering a real mention.
62
- MentionLogins = %w[
63
- mention
64
- mentions
65
- mentioned
66
- mentioning
67
- ].freeze
59
+ USERNAME_PATTERN = /[a-z0-9][a-z0-9-]*/
68
60
 
69
61
  # Don't look for mentions in text nodes that are children of these elements
70
- IGNORE_PARENTS = %w(pre code a style script).to_set
62
+ IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
71
63
 
72
- def call
64
+ SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
65
+
66
+ def after_initialize
73
67
  result[:mentioned_usernames] ||= []
68
+ end
74
69
 
75
- doc.search('.//text()').each do |node|
76
- content = node.to_html
77
- next unless content.include?('@')
78
- next if has_ancestor?(node, IGNORE_PARENTS)
79
- html = mention_link_filter(content, base_url, info_url, username_pattern)
80
- next if html == content
81
- node.replace(html)
82
- end
83
- doc
70
+ def selector
71
+ SELECTOR
72
+ end
73
+
74
+ def handle_text_chunk(text)
75
+ content = text.to_s
76
+ return unless content.include?("@")
77
+
78
+ html = mention_link_filter(content, base_url: base_url, username_pattern: username_pattern)
79
+ return if html == content
80
+
81
+ text.replace(html, as: :html)
84
82
  end
85
83
 
86
84
  # The URL to provide when someone @mentions a "mention" name, such
@@ -90,7 +88,7 @@ module HTML
90
88
  end
91
89
 
92
90
  def username_pattern
93
- context[:username_pattern] || UsernamePattern
91
+ context[:username_pattern] || USERNAME_PATTERN
94
92
  end
95
93
 
96
94
  # Replace user @mentions in text with links to the mentioned user's
@@ -105,35 +103,23 @@ module HTML
105
103
  #
106
104
  # Returns a string with @mentions replaced with links. All links have a
107
105
  # 'user-mention' class name attached for styling.
108
- def mention_link_filter(text, _base_url = '/', info_url = nil, username_pattern = UsernamePattern)
109
- self.class.mentioned_logins_in(text, username_pattern) do |match, login, is_mentioned|
110
- link =
111
- if is_mentioned
112
- link_to_mention_info(login, info_url)
113
- else
114
- link_to_mentioned_user(login)
115
- end
106
+ def mention_link_filter(text, base_url: "/", username_pattern: USERNAME_PATTERN)
107
+ self.class.mentioned_logins_in(text, username_pattern) do |match, login|
108
+ link = link_to_mentioned_user(base_url, login)
116
109
 
117
110
  link ? match.sub("@#{login}", link) : match
118
111
  end
119
112
  end
120
113
 
121
- def link_to_mention_info(text, info_url = nil)
122
- return "@#{text}" if info_url.nil?
123
- "<a href='#{info_url}' class='user-mention'>" \
124
- "@#{text}" \
125
- '</a>'
126
- end
127
-
128
- def link_to_mentioned_user(login)
114
+ def link_to_mentioned_user(base_url, login)
129
115
  result[:mentioned_usernames] |= [login]
130
116
 
131
117
  url = base_url.dup
132
- url << '/' unless url =~ /[\/~]\z/
118
+ url << "/" unless %r{[/~]\z}.match?(url)
133
119
 
134
- "<a href='#{url << login}' class='user-mention'>" \
120
+ "<a href=\"#{url << login}\" class=\"user-mention\">" \
135
121
  "@#{login}" \
136
- '</a>'
122
+ "</a>"
137
123
  end
138
124
  end
139
125
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ HTMLPipeline.require_dependency("rouge", "SyntaxHighlightFilter")
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter
7
+ # HTML Filter that syntax highlights text inside code blocks.
8
+ #
9
+ # Context options:
10
+ #
11
+ # :highlight => String represents the language to pick lexer. Defaults to empty string.
12
+ # :scope => String represents the class attribute adds to pre element after.
13
+ # Defaults to "highlight highlight-css" if highlights a css code block.
14
+ #
15
+ # This filter does not write any additional information to the context hash.
16
+ class SyntaxHighlightFilter < NodeFilter
17
+ def initialize(context: {}, result: {})
18
+ super(context: context, result: result)
19
+ # TODO: test the optionality of this
20
+ @formatter = context[:formatter] || Rouge::Formatters::HTML.new
21
+ end
22
+
23
+ SELECTOR = Selma::Selector.new(match_element: "pre", match_text_within: "pre")
24
+
25
+ def selector
26
+ SELECTOR
27
+ end
28
+
29
+ def handle_element(element)
30
+ default = context[:highlight]&.to_s
31
+ @lang = element["lang"] || default
32
+
33
+ scope = context.fetch(:scope, "highlight")
34
+
35
+ element["class"] = "#{scope} #{scope}-#{@lang}" if include_lang?
36
+ end
37
+
38
+ def handle_text_chunk(text)
39
+ return if @lang.nil?
40
+ return if (lexer = lexer_for(@lang)).nil?
41
+
42
+ content = text.to_s
43
+
44
+ text.replace(highlight_with_timeout_handling(content, lexer), as: :html)
45
+ end
46
+
47
+ def highlight_with_timeout_handling(text, lexer)
48
+ Rouge.highlight(text, lexer, @formatter)
49
+ rescue Timeout::Error => _e
50
+ text
51
+ end
52
+
53
+ def lexer_for(lang)
54
+ Rouge::Lexer.find(lang)
55
+ end
56
+
57
+ def include_lang?
58
+ !@lang.nil? && !@lang.empty?
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ class NodeFilter
5
+ # Generates a Table of Contents: an array of hashes containing:
6
+ # * `href`: the relative link to the header
7
+ # * `text`: the text of the header
8
+
9
+ # Examples
10
+ #
11
+ # TocPipeline =
12
+ # HTMLPipeline.new [
13
+ # HTMLPipeline::TableOfContentsFilter
14
+ # ]
15
+ # # => #<HTMLPipeline:0x007fc13c4528d8...>
16
+ # orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
17
+ # # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
18
+ # result = {}
19
+ # # => {}
20
+ # TocPipeline.call(orig, {}, result)
21
+ # # => {:toc=> ...}
22
+ # result[:toc]
23
+ # # => "{:href=>"#ice-cube", :text=>"Ice cube"}"
24
+ # result[:output].to_s
25
+ # # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
26
+ class TableOfContentsFilter < NodeFilter
27
+ SELECTOR = Selma::Selector.new(
28
+ match_element: "h1 a[href], h2 a[href], h3 a[href], h4 a[href], h5 a[href], h6 a[href]",
29
+ match_text_within: "h1, h2, h3, h4, h5, h6",
30
+ )
31
+
32
+ def selector
33
+ SELECTOR
34
+ end
35
+
36
+ # The icon that will be placed next to an anchored rendered markdown header
37
+ def anchor_html
38
+ @context[:anchor_html] || %(<span aria-hidden="true" class="anchor"></span>)
39
+ end
40
+
41
+ # The class that will be attached on the anchored rendered markdown header
42
+ def classes
43
+ context[:classes] || "anchor"
44
+ end
45
+
46
+ def after_initialize
47
+ result[:toc] = []
48
+ end
49
+
50
+ def handle_element(element)
51
+ header_href = element["href"]
52
+
53
+ return unless header_href.start_with?("#")
54
+
55
+ header_id = header_href[1..-1]
56
+
57
+ element["id"] = header_id
58
+ element["class"] = classes
59
+
60
+ element.set_inner_content(anchor_html, as: :html)
61
+
62
+ result[:toc] << { href: header_href }
63
+ end
64
+
65
+ def handle_text_chunk(text)
66
+ result[:toc].last[:text] = text.to_s
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter
7
+ # HTML filter that replaces @org/team mentions with links. Mentions within
8
+ # <pre>, <code>, <a>, <style>, and <script> elements are ignored.
9
+ #
10
+ # Context options:
11
+ # :base_url - Used to construct links to team profile pages for each
12
+ # mention.
13
+ # :team_pattern - Used to provide a custom regular expression to
14
+ # identify team names
15
+ #
16
+ class TeamMentionFilter < NodeFilter
17
+ class << self
18
+ # Public: Find @org/team mentions in text. See
19
+ # TeamMentionFilter#team_mention_link_filter.
20
+ #
21
+ # TeamMentionFilter.mentioned_teams_in(text) do |match, org, team|
22
+ # "<a href=...>#{team}</a>"
23
+ # end
24
+ #
25
+ # text - String text to search.
26
+ #
27
+ # Yields the String match, org name, and team name. The yield's
28
+ # return replaces the match in the original text.
29
+ #
30
+ # Returns a String replaced with the return of the block.
31
+ def mentioned_teams_in(text, team_pattern = TEAM_PATTERN)
32
+ text.gsub(team_pattern) do |match|
33
+ org = Regexp.last_match(1)
34
+ team = Regexp.last_match(2)
35
+ yield match, org, team
36
+ end
37
+ end
38
+ end
39
+
40
+ # Default pattern used to extract team names from text. The value can be
41
+ # overridden by providing the team_pattern variable in the context. To
42
+ # properly link the mention, should be in the format of /@(1)\/(2)/.
43
+ TEAM_PATTERN = %r{
44
+ (?<=^|\W) # beginning of string or non-word char
45
+ @([a-z0-9][a-z0-9-]*) # @organization
46
+ (?:/|&\#47;?) # dividing slash
47
+ ([a-z0-9][a-z0-9\-_]*) # team
48
+ \b
49
+ }ix
50
+
51
+ # Don't look for mentions in text nodes that are children of these elements
52
+ IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
53
+
54
+ SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
55
+
56
+ def after_initialize
57
+ result[:mentioned_teams] = []
58
+ end
59
+
60
+ def selector
61
+ SELECTOR
62
+ end
63
+
64
+ def handle_text_chunk(text)
65
+ content = text.to_s
66
+ return unless content.include?("@")
67
+
68
+ text.replace(mention_link_filter(content, base_url: base_url, team_pattern: team_pattern), as: :html)
69
+ end
70
+
71
+ def team_pattern
72
+ context[:team_pattern] || TEAM_PATTERN
73
+ end
74
+
75
+ # Replace @org/team mentions in text with links to the mentioned team's
76
+ # page.
77
+ #
78
+ # text - String text to replace @mention team names in.
79
+ # base_url - The base URL used to construct team page URLs.
80
+ # team_pattern - Regular expression used to identify teams in text
81
+ #
82
+ # Returns a string with @team mentions replaced with links. All links have a
83
+ # 'team-mention' class name attached for styling.
84
+ def mention_link_filter(text, base_url: "/", team_pattern: TEAM_PATTERN)
85
+ self.class.mentioned_teams_in(text, team_pattern) do |match, org, team|
86
+ link = link_to_mentioned_team(base_url, org, team)
87
+ seperator = %r{/|&\#47;?}
88
+
89
+ link ? match.sub(/@#{org}#{seperator}#{team}/, link) : match
90
+ end
91
+ end
92
+
93
+ def link_to_mentioned_team(base_url, org, team)
94
+ result[:mentioned_teams] |= [team]
95
+
96
+ url = base_url.dup
97
+ url << "/" unless %r{[/~]\z}.match?(url)
98
+
99
+ "<a href=\"#{url << org}/#{team}\" class=\"team-mention\">" \
100
+ "@#{org}/#{team}" \
101
+ "</a>"
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "selma"
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter < Filter
7
+ def initialize(context: {}, result: {})
8
+ super(context: context, result: {})
9
+ send(:after_initialize) if respond_to?(:after_initialize)
10
+ end
11
+
12
+ # The String representation of the document.
13
+ def html
14
+ raise InvalidDocumentException if @html.nil? && @doc.nil?
15
+
16
+ @html || doc.to_html
17
+ end
18
+
19
+ def reset!
20
+ result = {} # rubocop:disable Lint/UselessAssignment
21
+ send(:after_initialize) if respond_to?(:after_initialize)
22
+ end
23
+
24
+ class << self
25
+ def call(html, context: {}, result: {})
26
+ node_filter = new(context: context, result: result)
27
+ Selma::Rewriter.new(sanitizer: nil, handlers: [node_filter]).rewrite(html)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ # A special filter with sanization routines and allowlists. This module defines
5
+ # what HTML is allowed in user provided content and fixes up issues with
6
+ # unbalanced tags and whatnot.
7
+ #
8
+ # See the Selma docs for more information on the underlying library:
9
+ #
10
+ # https://github.com/gjtorikian/selma/#readme
11
+ #
12
+ # This filter does not write additional information to the context.
13
+ class SanitizationFilter
14
+ VALID_PROTOCOLS = Selma::Sanitizer::Config::VALID_PROTOCOLS.dup
15
+
16
+ # The main sanitization allowlist. Only these elements and attributes are
17
+ # allowed through by default.
18
+ DEFAULT_CONFIG = Selma::Sanitizer::Config.freeze_config({
19
+ elements: [
20
+ "h1",
21
+ "h2",
22
+ "h3",
23
+ "h4",
24
+ "h5",
25
+ "h6",
26
+ "br",
27
+ "b",
28
+ "i",
29
+ "strong",
30
+ "em",
31
+ "a",
32
+ "pre",
33
+ "code",
34
+ "img",
35
+ "tt",
36
+ "div",
37
+ "ins",
38
+ "del",
39
+ "sup",
40
+ "sub",
41
+ "p",
42
+ "picture",
43
+ "ol",
44
+ "ul",
45
+ "table",
46
+ "thead",
47
+ "tbody",
48
+ "tfoot",
49
+ "blockquote",
50
+ "dl",
51
+ "dt",
52
+ "dd",
53
+ "kbd",
54
+ "q",
55
+ "samp",
56
+ "var",
57
+ "hr",
58
+ "ruby",
59
+ "rt",
60
+ "rp",
61
+ "li",
62
+ "tr",
63
+ "td",
64
+ "th",
65
+ "s",
66
+ "strike",
67
+ "summary",
68
+ "details",
69
+ "caption",
70
+ "figure",
71
+ "figcaption",
72
+ "abbr",
73
+ "bdo",
74
+ "cite",
75
+ "dfn",
76
+ "mark",
77
+ "small",
78
+ "source",
79
+ "span",
80
+ "time",
81
+ "wbr",
82
+ ],
83
+
84
+ attributes: {
85
+ "a" => ["href"],
86
+ "img" => ["src", "longdesc", "loading", "alt"],
87
+ "div" => ["itemscope", "itemtype"],
88
+ "blockquote" => ["cite"],
89
+ "del" => ["cite"],
90
+ "ins" => ["cite"],
91
+ "q" => ["cite"],
92
+ "source" => ["srcset"],
93
+ all: [
94
+ "abbr",
95
+ "accept",
96
+ "accept-charset",
97
+ "accesskey",
98
+ "action",
99
+ "align",
100
+ "alt",
101
+ "aria-describedby",
102
+ "aria-hidden",
103
+ "aria-label",
104
+ "aria-labelledby",
105
+ "axis",
106
+ "border",
107
+ "char",
108
+ "charoff",
109
+ "charset",
110
+ "checked",
111
+ "clear",
112
+ "cols",
113
+ "colspan",
114
+ "compact",
115
+ "coords",
116
+ "datetime",
117
+ "dir",
118
+ "disabled",
119
+ "enctype",
120
+ "for",
121
+ "frame",
122
+ "headers",
123
+ "height",
124
+ "hreflang",
125
+ "hspace",
126
+ "id",
127
+ "ismap",
128
+ "label",
129
+ "lang",
130
+ "maxlength",
131
+ "media",
132
+ "method",
133
+ "multiple",
134
+ "name",
135
+ "nohref",
136
+ "noshade",
137
+ "nowrap",
138
+ "open",
139
+ "progress",
140
+ "prompt",
141
+ "readonly",
142
+ "rel",
143
+ "rev",
144
+ "role",
145
+ "rows",
146
+ "rowspan",
147
+ "rules",
148
+ "scope",
149
+ "selected",
150
+ "shape",
151
+ "size",
152
+ "span",
153
+ "start",
154
+ "summary",
155
+ "tabindex",
156
+ "title",
157
+ "type",
158
+ "usemap",
159
+ "valign",
160
+ "value",
161
+ "width",
162
+ "itemprop",
163
+ ],
164
+ },
165
+ protocols: {
166
+ "a" => { "href" => Selma::Sanitizer::Config::VALID_PROTOCOLS }.freeze,
167
+ "blockquote" => { "cite" => ["http", "https", :relative].freeze },
168
+ "del" => { "cite" => ["http", "https", :relative].freeze },
169
+ "ins" => { "cite" => ["http", "https", :relative].freeze },
170
+ "q" => { "cite" => ["http", "https", :relative].freeze },
171
+ "img" => {
172
+ "src" => ["http", "https", :relative].freeze,
173
+ "longdesc" => ["http", "https", :relative].freeze,
174
+ },
175
+ },
176
+ })
177
+
178
+ class << self
179
+ def call(html, config)
180
+ raise ArgumentError, "html must be a String, not #{html.class}" unless html.is_a?(String)
181
+ raise ArgumentError, "config must be a Hash, not #{config.class}" unless config.is_a?(Hash)
182
+
183
+ sanitization_config = Selma::Sanitizer.new(config)
184
+ Selma::Rewriter.new(sanitizer: sanitization_config).rewrite(html)
185
+ end
186
+ end
187
+ end
188
+ end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module HTML
4
- class Pipeline
3
+ class HTMLPipeline
4
+ class TextFilter
5
5
  # HTML Filter that converts image's url into <img> tag.
6
6
  # For example, it will convert
7
7
  # http://example.com/test.jpg
@@ -10,7 +10,7 @@ module HTML
10
10
 
11
11
  class ImageFilter < TextFilter
12
12
  def call
13
- @text.gsub(/(https|http)?:\/\/.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?/i) do |match|
13
+ @text.gsub(%r{(https|http)?://.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?}i) do |match|
14
14
  %(<img src="#{match}" alt=""/>)
15
15
  end
16
16
  end
@@ -1,14 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- HTML::Pipeline.require_dependency('escape_utils', 'PlainTextInputFilter')
4
-
5
- module HTML
6
- class Pipeline
3
+ class HTMLPipeline
4
+ class TextFilter
7
5
  # Simple filter for plain text input. HTML escapes the text input and wraps it
8
6
  # in a div.
9
7
  class PlainTextInputFilter < TextFilter
10
8
  def call
11
- "<div>#{CGI.escape_html(@text)}</div>"
9
+ "<div>#{CGI.escapeHTML(@text)}</div>"
12
10
  end
13
11
  end
14
12
  end