html-pipeline 2.14.3 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.github/FUNDING.yml +11 -3
  3. data/.github/dependabot.yml +27 -0
  4. data/.github/workflows/automerge.yml +13 -0
  5. data/.github/workflows/ci.yml +22 -0
  6. data/.github/workflows/lint.yml +23 -0
  7. data/.github/workflows/publish.yml +19 -0
  8. data/.rubocop.yml +17 -0
  9. data/.ruby-version +1 -0
  10. data/.vscode/settings.json +8 -0
  11. data/CHANGELOG.md +119 -2
  12. data/Gemfile +31 -15
  13. data/{LICENSE → LICENSE.txt} +2 -2
  14. data/README.md +241 -224
  15. data/Rakefile +14 -7
  16. data/UPGRADING.md +34 -0
  17. data/html-pipeline.gemspec +31 -21
  18. data/lib/html-pipeline.rb +3 -0
  19. data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
  20. data/lib/html_pipeline/convert_filter.rb +17 -0
  21. data/lib/html_pipeline/filter.rb +89 -0
  22. data/lib/html_pipeline/node_filter/absolute_source_filter.rb +54 -0
  23. data/lib/html_pipeline/node_filter/asset_proxy_filter.rb +86 -0
  24. data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
  25. data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
  26. data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
  27. data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +54 -68
  28. data/lib/html_pipeline/node_filter/syntax_highlight_filter.rb +62 -0
  29. data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +70 -0
  30. data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
  31. data/lib/html_pipeline/node_filter.rb +31 -0
  32. data/lib/html_pipeline/sanitization_filter.rb +188 -0
  33. data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
  34. data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
  35. data/lib/html_pipeline/text_filter.rb +21 -0
  36. data/lib/html_pipeline/version.rb +5 -0
  37. data/lib/html_pipeline.rb +281 -0
  38. metadata +58 -54
  39. data/.travis.yml +0 -43
  40. data/Appraisals +0 -19
  41. data/CONTRIBUTING.md +0 -60
  42. data/bin/html-pipeline +0 -78
  43. data/lib/html/pipeline/@team_mention_filter.rb +0 -99
  44. data/lib/html/pipeline/absolute_source_filter.rb +0 -52
  45. data/lib/html/pipeline/autolink_filter.rb +0 -34
  46. data/lib/html/pipeline/body_content.rb +0 -44
  47. data/lib/html/pipeline/camo_filter.rb +0 -105
  48. data/lib/html/pipeline/email_reply_filter.rb +0 -69
  49. data/lib/html/pipeline/filter.rb +0 -165
  50. data/lib/html/pipeline/https_filter.rb +0 -29
  51. data/lib/html/pipeline/image_max_width_filter.rb +0 -37
  52. data/lib/html/pipeline/markdown_filter.rb +0 -56
  53. data/lib/html/pipeline/sanitization_filter.rb +0 -144
  54. data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
  55. data/lib/html/pipeline/text_filter.rb +0 -16
  56. data/lib/html/pipeline/textile_filter.rb +0 -25
  57. data/lib/html/pipeline/toc_filter.rb +0 -69
  58. data/lib/html/pipeline/version.rb +0 -7
  59. data/lib/html/pipeline.rb +0 -210
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'set'
3
+ require "set"
4
4
 
5
- module HTML
6
- class Pipeline
5
+ class HTMLPipeline
6
+ class NodeFilter
7
7
  # HTML filter that replaces @user mentions with links. Mentions within <pre>,
8
8
  # <code>, and <a> elements are ignored. Mentions that reference users that do
9
9
  # not exist are ignored.
@@ -16,71 +16,69 @@ module HTML
16
16
  # :username_pattern - Used to provide a custom regular expression to
17
17
  # identify usernames
18
18
  #
19
- class MentionFilter < Filter
20
- # Public: Find user @mentions in text. See
21
- # MentionFilter#mention_link_filter.
22
- #
23
- # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
24
- # "<a href=...>#{login}</a>"
25
- # end
26
- #
27
- # text - String text to search.
28
- #
29
- # Yields the String match, the String login name, and a Boolean determining
30
- # if the match = "@mention[ed]". The yield's return replaces the match in
31
- # the original text.
32
- #
33
- # Returns a String replaced with the return of the block.
34
- def self.mentioned_logins_in(text, username_pattern = UsernamePattern)
35
- text.gsub MentionPatterns[username_pattern] do |match|
36
- login = Regexp.last_match(1)
37
- yield match, login, MentionLogins.include?(login.downcase)
19
+ class MentionFilter < NodeFilter
20
+ class << self
21
+ # Public: Find user @mentions in text. See
22
+ # MentionFilter#mention_link_filter.
23
+ #
24
+ # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
25
+ # "<a href=...>#{login}</a>"
26
+ # end
27
+ #
28
+ # text - String text to search.
29
+ #
30
+ # Yields the String match, the String login name, and a Boolean determining
31
+ # if the match = "@mention[ed]". The yield's return replaces the match in
32
+ # the original text.
33
+ #
34
+ # Returns a String replaced with the return of the block.
35
+ def mentioned_logins_in(text, username_pattern = USERNAME_PATTERN)
36
+ text.gsub(MENTION_PATTERNS[username_pattern]) do |match|
37
+ login = Regexp.last_match(1)
38
+ yield match, login
39
+ end
38
40
  end
39
41
  end
40
-
41
42
  # Hash that contains all of the mention patterns used by the pipeline
42
- MentionPatterns = Hash.new do |hash, key|
43
- hash[key] = /
43
+ MENTION_PATTERNS = Hash.new do |hash, key|
44
+ hash[key] = %r{
44
45
  (?:^|\W) # beginning of string or non-word char
45
46
  @((?>#{key})) # @username
46
- (?!\/) # without a trailing slash
47
+ (?!/) # without a trailing slash
47
48
  (?=
48
49
  \.+[ \t\W]| # dots followed by space or non-word character
49
50
  \.+$| # dots at end of line
50
51
  [^0-9a-zA-Z_.]| # non-word character except dot
51
52
  $ # end of line
52
53
  )
53
- /ix
54
+ }ix
54
55
  end
55
56
 
56
57
  # Default pattern used to extract usernames from text. The value can be
57
58
  # overriden by providing the username_pattern variable in the context.
58
- UsernamePattern = /[a-z0-9][a-z0-9-]*/
59
-
60
- # List of username logins that, when mentioned, link to the blog post
61
- # about @mentions instead of triggering a real mention.
62
- MentionLogins = %w[
63
- mention
64
- mentions
65
- mentioned
66
- mentioning
67
- ].freeze
59
+ USERNAME_PATTERN = /[a-z0-9][a-z0-9-]*/
68
60
 
69
61
  # Don't look for mentions in text nodes that are children of these elements
70
- IGNORE_PARENTS = %w(pre code a style script).to_set
62
+ IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
71
63
 
72
- def call
64
+ SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
65
+
66
+ def after_initialize
73
67
  result[:mentioned_usernames] ||= []
68
+ end
74
69
 
75
- doc.search('.//text()').each do |node|
76
- content = node.to_html
77
- next unless content.include?('@')
78
- next if has_ancestor?(node, IGNORE_PARENTS)
79
- html = mention_link_filter(content, base_url, info_url, username_pattern)
80
- next if html == content
81
- node.replace(html)
82
- end
83
- doc
70
+ def selector
71
+ SELECTOR
72
+ end
73
+
74
+ def handle_text_chunk(text)
75
+ content = text.to_s
76
+ return unless content.include?("@")
77
+
78
+ html = mention_link_filter(content, base_url: base_url, username_pattern: username_pattern)
79
+ return if html == content
80
+
81
+ text.replace(html, as: :html)
84
82
  end
85
83
 
86
84
  # The URL to provide when someone @mentions a "mention" name, such
@@ -90,7 +88,7 @@ module HTML
90
88
  end
91
89
 
92
90
  def username_pattern
93
- context[:username_pattern] || UsernamePattern
91
+ context[:username_pattern] || USERNAME_PATTERN
94
92
  end
95
93
 
96
94
  # Replace user @mentions in text with links to the mentioned user's
@@ -105,35 +103,23 @@ module HTML
105
103
  #
106
104
  # Returns a string with @mentions replaced with links. All links have a
107
105
  # 'user-mention' class name attached for styling.
108
- def mention_link_filter(text, _base_url = '/', info_url = nil, username_pattern = UsernamePattern)
109
- self.class.mentioned_logins_in(text, username_pattern) do |match, login, is_mentioned|
110
- link =
111
- if is_mentioned
112
- link_to_mention_info(login, info_url)
113
- else
114
- link_to_mentioned_user(login)
115
- end
106
+ def mention_link_filter(text, base_url: "/", username_pattern: USERNAME_PATTERN)
107
+ self.class.mentioned_logins_in(text, username_pattern) do |match, login|
108
+ link = link_to_mentioned_user(base_url, login)
116
109
 
117
110
  link ? match.sub("@#{login}", link) : match
118
111
  end
119
112
  end
120
113
 
121
- def link_to_mention_info(text, info_url = nil)
122
- return "@#{text}" if info_url.nil?
123
- "<a href='#{info_url}' class='user-mention'>" \
124
- "@#{text}" \
125
- '</a>'
126
- end
127
-
128
- def link_to_mentioned_user(login)
114
+ def link_to_mentioned_user(base_url, login)
129
115
  result[:mentioned_usernames] |= [login]
130
116
 
131
117
  url = base_url.dup
132
- url << '/' unless url =~ /[\/~]\z/
118
+ url << "/" unless %r{[/~]\z}.match?(url)
133
119
 
134
- "<a href='#{url << login}' class='user-mention'>" \
120
+ "<a href=\"#{url << login}\" class=\"user-mention\">" \
135
121
  "@#{login}" \
136
- '</a>'
122
+ "</a>"
137
123
  end
138
124
  end
139
125
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ HTMLPipeline.require_dependency("rouge", "SyntaxHighlightFilter")
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter
7
+ # HTML Filter that syntax highlights text inside code blocks.
8
+ #
9
+ # Context options:
10
+ #
11
+ # :highlight => String represents the language to pick lexer. Defaults to empty string.
12
+ # :scope => String represents the class attribute adds to pre element after.
13
+ # Defaults to "highlight highlight-css" if highlights a css code block.
14
+ #
15
+ # This filter does not write any additional information to the context hash.
16
+ class SyntaxHighlightFilter < NodeFilter
17
+ def initialize(context: {}, result: {})
18
+ super(context: context, result: result)
19
+ # TODO: test the optionality of this
20
+ @formatter = context[:formatter] || Rouge::Formatters::HTML.new
21
+ end
22
+
23
+ SELECTOR = Selma::Selector.new(match_element: "pre", match_text_within: "pre")
24
+
25
+ def selector
26
+ SELECTOR
27
+ end
28
+
29
+ def handle_element(element)
30
+ default = context[:highlight]&.to_s
31
+ @lang = element["lang"] || default
32
+
33
+ scope = context.fetch(:scope, "highlight")
34
+
35
+ element["class"] = "#{scope} #{scope}-#{@lang}" if include_lang?
36
+ end
37
+
38
+ def handle_text_chunk(text)
39
+ return if @lang.nil?
40
+ return if (lexer = lexer_for(@lang)).nil?
41
+
42
+ content = text.to_s
43
+
44
+ text.replace(highlight_with_timeout_handling(content, lexer), as: :html)
45
+ end
46
+
47
+ def highlight_with_timeout_handling(text, lexer)
48
+ Rouge.highlight(text, lexer, @formatter)
49
+ rescue Timeout::Error => _e
50
+ text
51
+ end
52
+
53
+ def lexer_for(lang)
54
+ Rouge::Lexer.find(lang)
55
+ end
56
+
57
+ def include_lang?
58
+ !@lang.nil? && !@lang.empty?
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ class NodeFilter
5
+ # Generates a Table of Contents: an array of hashes containing:
6
+ # * `href`: the relative link to the header
7
+ # * `text`: the text of the header
8
+
9
+ # Examples
10
+ #
11
+ # TocPipeline =
12
+ # HTMLPipeline.new [
13
+ # HTMLPipeline::TableOfContentsFilter
14
+ # ]
15
+ # # => #<HTMLPipeline:0x007fc13c4528d8...>
16
+ # orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
17
+ # # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
18
+ # result = {}
19
+ # # => {}
20
+ # TocPipeline.call(orig, {}, result)
21
+ # # => {:toc=> ...}
22
+ # result[:toc]
23
+ # # => "{:href=>"#ice-cube", :text=>"Ice cube"}"
24
+ # result[:output].to_s
25
+ # # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
26
+ class TableOfContentsFilter < NodeFilter
27
+ SELECTOR = Selma::Selector.new(
28
+ match_element: "h1 a[href], h2 a[href], h3 a[href], h4 a[href], h5 a[href], h6 a[href]",
29
+ match_text_within: "h1, h2, h3, h4, h5, h6",
30
+ )
31
+
32
+ def selector
33
+ SELECTOR
34
+ end
35
+
36
+ # The icon that will be placed next to an anchored rendered markdown header
37
+ def anchor_html
38
+ @context[:anchor_html] || %(<span aria-hidden="true" class="anchor"></span>)
39
+ end
40
+
41
+ # The class that will be attached on the anchored rendered markdown header
42
+ def classes
43
+ context[:classes] || "anchor"
44
+ end
45
+
46
+ def after_initialize
47
+ result[:toc] = []
48
+ end
49
+
50
+ def handle_element(element)
51
+ header_href = element["href"]
52
+
53
+ return unless header_href.start_with?("#")
54
+
55
+ header_id = header_href[1..-1]
56
+
57
+ element["id"] = header_id
58
+ element["class"] = classes
59
+
60
+ element.set_inner_content(anchor_html, as: :html)
61
+
62
+ result[:toc] << { href: header_href }
63
+ end
64
+
65
+ def handle_text_chunk(text)
66
+ result[:toc].last[:text] = text.to_s
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter
7
+ # HTML filter that replaces @org/team mentions with links. Mentions within
8
+ # <pre>, <code>, <a>, <style>, and <script> elements are ignored.
9
+ #
10
+ # Context options:
11
+ # :base_url - Used to construct links to team profile pages for each
12
+ # mention.
13
+ # :team_pattern - Used to provide a custom regular expression to
14
+ # identify team names
15
+ #
16
+ class TeamMentionFilter < NodeFilter
17
+ class << self
18
+ # Public: Find @org/team mentions in text. See
19
+ # TeamMentionFilter#team_mention_link_filter.
20
+ #
21
+ # TeamMentionFilter.mentioned_teams_in(text) do |match, org, team|
22
+ # "<a href=...>#{team}</a>"
23
+ # end
24
+ #
25
+ # text - String text to search.
26
+ #
27
+ # Yields the String match, org name, and team name. The yield's
28
+ # return replaces the match in the original text.
29
+ #
30
+ # Returns a String replaced with the return of the block.
31
+ def mentioned_teams_in(text, team_pattern = TEAM_PATTERN)
32
+ text.gsub(team_pattern) do |match|
33
+ org = Regexp.last_match(1)
34
+ team = Regexp.last_match(2)
35
+ yield match, org, team
36
+ end
37
+ end
38
+ end
39
+
40
+ # Default pattern used to extract team names from text. The value can be
41
+ # overridden by providing the team_pattern variable in the context. To
42
+ # properly link the mention, should be in the format of /@(1)\/(2)/.
43
+ TEAM_PATTERN = %r{
44
+ (?<=^|\W) # beginning of string or non-word char
45
+ @([a-z0-9][a-z0-9-]*) # @organization
46
+ (?:/|&\#47;?) # dividing slash
47
+ ([a-z0-9][a-z0-9\-_]*) # team
48
+ \b
49
+ }ix
50
+
51
+ # Don't look for mentions in text nodes that are children of these elements
52
+ IGNORE_PARENTS = ["pre", "code", "a", "style", "script"]
53
+
54
+ SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: IGNORE_PARENTS)
55
+
56
+ def after_initialize
57
+ result[:mentioned_teams] = []
58
+ end
59
+
60
+ def selector
61
+ SELECTOR
62
+ end
63
+
64
+ def handle_text_chunk(text)
65
+ content = text.to_s
66
+ return unless content.include?("@")
67
+
68
+ text.replace(mention_link_filter(content, base_url: base_url, team_pattern: team_pattern), as: :html)
69
+ end
70
+
71
+ def team_pattern
72
+ context[:team_pattern] || TEAM_PATTERN
73
+ end
74
+
75
+ # Replace @org/team mentions in text with links to the mentioned team's
76
+ # page.
77
+ #
78
+ # text - String text to replace @mention team names in.
79
+ # base_url - The base URL used to construct team page URLs.
80
+ # team_pattern - Regular expression used to identify teams in text
81
+ #
82
+ # Returns a string with @team mentions replaced with links. All links have a
83
+ # 'team-mention' class name attached for styling.
84
+ def mention_link_filter(text, base_url: "/", team_pattern: TEAM_PATTERN)
85
+ self.class.mentioned_teams_in(text, team_pattern) do |match, org, team|
86
+ link = link_to_mentioned_team(base_url, org, team)
87
+ seperator = %r{/|&\#47;?}
88
+
89
+ link ? match.sub(/@#{org}#{seperator}#{team}/, link) : match
90
+ end
91
+ end
92
+
93
+ def link_to_mentioned_team(base_url, org, team)
94
+ result[:mentioned_teams] |= [team]
95
+
96
+ url = base_url.dup
97
+ url << "/" unless %r{[/~]\z}.match?(url)
98
+
99
+ "<a href=\"#{url << org}/#{team}\" class=\"team-mention\">" \
100
+ "@#{org}/#{team}" \
101
+ "</a>"
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "selma"
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter < Filter
7
+ def initialize(context: {}, result: {})
8
+ super(context: context, result: {})
9
+ send(:after_initialize) if respond_to?(:after_initialize)
10
+ end
11
+
12
+ # The String representation of the document.
13
+ def html
14
+ raise InvalidDocumentException if @html.nil? && @doc.nil?
15
+
16
+ @html || doc.to_html
17
+ end
18
+
19
+ def reset!
20
+ result = {} # rubocop:disable Lint/UselessAssignment
21
+ send(:after_initialize) if respond_to?(:after_initialize)
22
+ end
23
+
24
+ class << self
25
+ def call(html, context: {}, result: {})
26
+ node_filter = new(context: context, result: result)
27
+ Selma::Rewriter.new(sanitizer: nil, handlers: [node_filter]).rewrite(html)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ # A special filter with sanization routines and allowlists. This module defines
5
+ # what HTML is allowed in user provided content and fixes up issues with
6
+ # unbalanced tags and whatnot.
7
+ #
8
+ # See the Selma docs for more information on the underlying library:
9
+ #
10
+ # https://github.com/gjtorikian/selma/#readme
11
+ #
12
+ # This filter does not write additional information to the context.
13
+ class SanitizationFilter
14
+ VALID_PROTOCOLS = Selma::Sanitizer::Config::VALID_PROTOCOLS.dup
15
+
16
+ # The main sanitization allowlist. Only these elements and attributes are
17
+ # allowed through by default.
18
+ DEFAULT_CONFIG = Selma::Sanitizer::Config.freeze_config({
19
+ elements: [
20
+ "h1",
21
+ "h2",
22
+ "h3",
23
+ "h4",
24
+ "h5",
25
+ "h6",
26
+ "br",
27
+ "b",
28
+ "i",
29
+ "strong",
30
+ "em",
31
+ "a",
32
+ "pre",
33
+ "code",
34
+ "img",
35
+ "tt",
36
+ "div",
37
+ "ins",
38
+ "del",
39
+ "sup",
40
+ "sub",
41
+ "p",
42
+ "picture",
43
+ "ol",
44
+ "ul",
45
+ "table",
46
+ "thead",
47
+ "tbody",
48
+ "tfoot",
49
+ "blockquote",
50
+ "dl",
51
+ "dt",
52
+ "dd",
53
+ "kbd",
54
+ "q",
55
+ "samp",
56
+ "var",
57
+ "hr",
58
+ "ruby",
59
+ "rt",
60
+ "rp",
61
+ "li",
62
+ "tr",
63
+ "td",
64
+ "th",
65
+ "s",
66
+ "strike",
67
+ "summary",
68
+ "details",
69
+ "caption",
70
+ "figure",
71
+ "figcaption",
72
+ "abbr",
73
+ "bdo",
74
+ "cite",
75
+ "dfn",
76
+ "mark",
77
+ "small",
78
+ "source",
79
+ "span",
80
+ "time",
81
+ "wbr",
82
+ ],
83
+
84
+ attributes: {
85
+ "a" => ["href"],
86
+ "img" => ["src", "longdesc", "loading", "alt"],
87
+ "div" => ["itemscope", "itemtype"],
88
+ "blockquote" => ["cite"],
89
+ "del" => ["cite"],
90
+ "ins" => ["cite"],
91
+ "q" => ["cite"],
92
+ "source" => ["srcset"],
93
+ all: [
94
+ "abbr",
95
+ "accept",
96
+ "accept-charset",
97
+ "accesskey",
98
+ "action",
99
+ "align",
100
+ "alt",
101
+ "aria-describedby",
102
+ "aria-hidden",
103
+ "aria-label",
104
+ "aria-labelledby",
105
+ "axis",
106
+ "border",
107
+ "char",
108
+ "charoff",
109
+ "charset",
110
+ "checked",
111
+ "clear",
112
+ "cols",
113
+ "colspan",
114
+ "compact",
115
+ "coords",
116
+ "datetime",
117
+ "dir",
118
+ "disabled",
119
+ "enctype",
120
+ "for",
121
+ "frame",
122
+ "headers",
123
+ "height",
124
+ "hreflang",
125
+ "hspace",
126
+ "id",
127
+ "ismap",
128
+ "label",
129
+ "lang",
130
+ "maxlength",
131
+ "media",
132
+ "method",
133
+ "multiple",
134
+ "name",
135
+ "nohref",
136
+ "noshade",
137
+ "nowrap",
138
+ "open",
139
+ "progress",
140
+ "prompt",
141
+ "readonly",
142
+ "rel",
143
+ "rev",
144
+ "role",
145
+ "rows",
146
+ "rowspan",
147
+ "rules",
148
+ "scope",
149
+ "selected",
150
+ "shape",
151
+ "size",
152
+ "span",
153
+ "start",
154
+ "summary",
155
+ "tabindex",
156
+ "title",
157
+ "type",
158
+ "usemap",
159
+ "valign",
160
+ "value",
161
+ "width",
162
+ "itemprop",
163
+ ],
164
+ },
165
+ protocols: {
166
+ "a" => { "href" => Selma::Sanitizer::Config::VALID_PROTOCOLS }.freeze,
167
+ "blockquote" => { "cite" => ["http", "https", :relative].freeze },
168
+ "del" => { "cite" => ["http", "https", :relative].freeze },
169
+ "ins" => { "cite" => ["http", "https", :relative].freeze },
170
+ "q" => { "cite" => ["http", "https", :relative].freeze },
171
+ "img" => {
172
+ "src" => ["http", "https", :relative].freeze,
173
+ "longdesc" => ["http", "https", :relative].freeze,
174
+ },
175
+ },
176
+ })
177
+
178
+ class << self
179
+ def call(html, config)
180
+ raise ArgumentError, "html must be a String, not #{html.class}" unless html.is_a?(String)
181
+ raise ArgumentError, "config must be a Hash, not #{config.class}" unless config.is_a?(Hash)
182
+
183
+ sanitization_config = Selma::Sanitizer.new(config)
184
+ Selma::Rewriter.new(sanitizer: sanitization_config).rewrite(html)
185
+ end
186
+ end
187
+ end
188
+ end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module HTML
4
- class Pipeline
3
+ class HTMLPipeline
4
+ class TextFilter
5
5
  # HTML Filter that converts image's url into <img> tag.
6
6
  # For example, it will convert
7
7
  # http://example.com/test.jpg
@@ -10,7 +10,7 @@ module HTML
10
10
 
11
11
  class ImageFilter < TextFilter
12
12
  def call
13
- @text.gsub(/(https|http)?:\/\/.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?/i) do |match|
13
+ @text.gsub(%r{(https|http)?://.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?}i) do |match|
14
14
  %(<img src="#{match}" alt=""/>)
15
15
  end
16
16
  end
@@ -1,14 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- HTML::Pipeline.require_dependency('escape_utils', 'PlainTextInputFilter')
4
-
5
- module HTML
6
- class Pipeline
3
+ class HTMLPipeline
4
+ class TextFilter
7
5
  # Simple filter for plain text input. HTML escapes the text input and wraps it
8
6
  # in a div.
9
7
  class PlainTextInputFilter < TextFilter
10
8
  def call
11
- "<div>#{CGI.escape_html(@text)}</div>"
9
+ "<div>#{CGI.escapeHTML(@text)}</div>"
12
10
  end
13
11
  end
14
12
  end