loofah 2.19.0 → 2.23.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  module XML # :nodoc:
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  module XML # :nodoc:
4
5
  #
@@ -8,15 +9,10 @@ module Loofah
8
9
  #
9
10
  class DocumentFragment < Nokogiri::XML::DocumentFragment
10
11
  class << self
11
- #
12
- # Overridden Nokogiri::XML::DocumentFragment
13
- # constructor. Applications should use Loofah.fragment to
14
- # parse a fragment.
15
- #
16
12
  def parse(tags)
17
13
  doc = Loofah::XML::Document.new
18
14
  doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
19
- self.new(doc, tags)
15
+ new(doc, tags)
20
16
  end
21
17
  end
22
18
  end
data/lib/loofah.rb CHANGED
@@ -1,8 +1,21 @@
1
1
  # frozen_string_literal: true
2
- $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
3
2
 
4
3
  require "nokogiri"
5
4
 
5
+ module Loofah
6
+ class << self
7
+ def html5_support?
8
+ # Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
9
+ # subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
10
+ return @html5_support if defined? @html5_support
11
+
12
+ @html5_support =
13
+ Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
14
+ Nokogiri.uses_gumbo?
15
+ end
16
+ end
17
+ end
18
+
6
19
  require_relative "loofah/version"
7
20
  require_relative "loofah/metahelpers"
8
21
  require_relative "loofah/elements"
@@ -14,51 +27,129 @@ require_relative "loofah/html5/scrub"
14
27
  require_relative "loofah/scrubber"
15
28
  require_relative "loofah/scrubbers"
16
29
 
17
- require_relative "loofah/instance_methods"
30
+ require_relative "loofah/concerns"
18
31
  require_relative "loofah/xml/document"
19
32
  require_relative "loofah/xml/document_fragment"
20
- require_relative "loofah/html/document"
21
- require_relative "loofah/html/document_fragment"
33
+ require_relative "loofah/html4/document"
34
+ require_relative "loofah/html4/document_fragment"
35
+
36
+ if Loofah.html5_support?
37
+ require_relative "loofah/html5/document"
38
+ require_relative "loofah/html5/document_fragment"
39
+ end
22
40
 
23
41
  # == Strings and IO Objects as Input
24
42
  #
25
- # Loofah.document and Loofah.fragment accept any IO object in addition
26
- # to accepting a string. That IO object could be a file, or a socket,
27
- # or a StringIO, or anything that responds to +read+ and
28
- # +close+. Which makes it particularly easy to sanitize mass
29
- # quantities of docs.
43
+ # The following methods accept any IO object in addition to accepting a string:
44
+ #
45
+ # - Loofah.html4_document
46
+ # - Loofah.html4_fragment
47
+ # - Loofah.scrub_html4_document
48
+ # - Loofah.scrub_html4_fragment
49
+ #
50
+ # - Loofah.html5_document
51
+ # - Loofah.html5_fragment
52
+ # - Loofah.scrub_html5_document
53
+ # - Loofah.scrub_html5_fragment
54
+ #
55
+ # - Loofah.xml_document
56
+ # - Loofah.xml_fragment
57
+ # - Loofah.scrub_xml_document
58
+ # - Loofah.scrub_xml_fragment
59
+ #
60
+ # - Loofah.document
61
+ # - Loofah.fragment
62
+ # - Loofah.scrub_document
63
+ # - Loofah.scrub_fragment
64
+ #
65
+ # That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
66
+ # and +close+.
30
67
  #
31
68
  module Loofah
69
+ # Alias for Loofah::HTML4
70
+ HTML = HTML4
71
+
32
72
  class << self
33
- # Shortcut for Loofah::HTML::Document.parse
34
- # This method accepts the same parameters as Nokogiri::HTML::Document.parse
35
- def document(*args, &block)
36
- remove_comments_before_html_element Loofah::HTML::Document.parse(*args, &block)
73
+ # Shortcut for Loofah::HTML4::Document.parse(*args, &block)
74
+ #
75
+ # This method accepts the same parameters as Nokogiri::HTML4::Document.parse
76
+ def html4_document(*args, &block)
77
+ Loofah::HTML4::Document.parse(*args, &block)
78
+ end
79
+
80
+ # Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
81
+ #
82
+ # This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
83
+ def html4_fragment(*args, &block)
84
+ Loofah::HTML4::DocumentFragment.parse(*args, &block)
37
85
  end
38
86
 
39
- # Shortcut for Loofah::HTML::DocumentFragment.parse
40
- # This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
41
- def fragment(*args, &block)
42
- Loofah::HTML::DocumentFragment.parse(*args, &block)
87
+ # Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
88
+ def scrub_html4_document(string_or_io, method)
89
+ Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
43
90
  end
44
91
 
45
- # Shortcut for Loofah.fragment(string_or_io).scrub!(method)
46
- def scrub_fragment(string_or_io, method)
47
- Loofah.fragment(string_or_io).scrub!(method)
92
+ # Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
93
+ def scrub_html4_fragment(string_or_io, method)
94
+ Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
48
95
  end
49
96
 
50
- # Shortcut for Loofah.document(string_or_io).scrub!(method)
51
- def scrub_document(string_or_io, method)
52
- Loofah.document(string_or_io).scrub!(method)
97
+ if Loofah.html5_support?
98
+ # Shortcut for Loofah::HTML5::Document.parse(*args, &block)
99
+ #
100
+ # This method accepts the same parameters as Nokogiri::HTML5::Document.parse
101
+ def html5_document(*args, &block)
102
+ Loofah::HTML5::Document.parse(*args, &block)
103
+ end
104
+
105
+ # Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
106
+ #
107
+ # This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
108
+ def html5_fragment(*args, &block)
109
+ Loofah::HTML5::DocumentFragment.parse(*args, &block)
110
+ end
111
+
112
+ # Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
113
+ def scrub_html5_document(string_or_io, method)
114
+ Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
115
+ end
116
+
117
+ # Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
118
+ def scrub_html5_fragment(string_or_io, method)
119
+ Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
120
+ end
121
+ else
122
+ def html5_document(*args, &block)
123
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
124
+ end
125
+
126
+ def html5_fragment(*args, &block)
127
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
128
+ end
129
+
130
+ def scrub_html5_document(string_or_io, method)
131
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
132
+ end
133
+
134
+ def scrub_html5_fragment(string_or_io, method)
135
+ raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
136
+ end
53
137
  end
54
138
 
55
- # Shortcut for Loofah::XML::Document.parse
139
+ alias_method :document, :html4_document
140
+ alias_method :fragment, :html4_fragment
141
+ alias_method :scrub_document, :scrub_html4_document
142
+ alias_method :scrub_fragment, :scrub_html4_fragment
143
+
144
+ # Shortcut for Loofah::XML::Document.parse(*args, &block)
145
+ #
56
146
  # This method accepts the same parameters as Nokogiri::XML::Document.parse
57
147
  def xml_document(*args, &block)
58
148
  Loofah::XML::Document.parse(*args, &block)
59
149
  end
60
150
 
61
- # Shortcut for Loofah::XML::DocumentFragment.parse
151
+ # Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
152
+ #
62
153
  # This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
63
154
  def xml_fragment(*args, &block)
64
155
  Loofah::XML::DocumentFragment.parse(*args, &block)
@@ -78,23 +169,5 @@ module Loofah
78
169
  def remove_extraneous_whitespace(string)
79
170
  string.gsub(/\n\s*\n\s*\n/, "\n\n")
80
171
  end
81
-
82
- private
83
-
84
- # remove comments that exist outside of the HTML element.
85
- #
86
- # these comments are allowed by the HTML spec:
87
- #
88
- # https://www.w3.org/TR/html401/struct/global.html#h-7.1
89
- #
90
- # but are not scrubbed by Loofah because these nodes don't meet
91
- # the contract that scrubbers expect of a node (e.g., it can be
92
- # replaced, sibling and children nodes can be created).
93
- def remove_comments_before_html_element(doc)
94
- doc.children.each do |child|
95
- child.unlink if child.comment?
96
- end
97
- doc
98
- end
99
172
  end
100
173
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.19.0
4
+ version: 2.23.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
8
8
  - Bryan Helmkamp
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-09-14 00:00:00.000000000 Z
12
+ date: 2024-10-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: crass
@@ -31,124 +31,20 @@ dependencies:
31
31
  requirements:
32
32
  - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: 1.5.9
34
+ version: 1.12.0
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
- version: 1.5.9
42
- - !ruby/object:Gem::Dependency
43
- name: hoe-markdown
44
- requirement: !ruby/object:Gem::Requirement
45
- requirements:
46
- - - "~>"
47
- - !ruby/object:Gem::Version
48
- version: '1.3'
49
- type: :development
50
- prerelease: false
51
- version_requirements: !ruby/object:Gem::Requirement
52
- requirements:
53
- - - "~>"
54
- - !ruby/object:Gem::Version
55
- version: '1.3'
56
- - !ruby/object:Gem::Dependency
57
- name: json
58
- requirement: !ruby/object:Gem::Requirement
59
- requirements:
60
- - - "~>"
61
- - !ruby/object:Gem::Version
62
- version: '2.2'
63
- type: :development
64
- prerelease: false
65
- version_requirements: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '2.2'
70
- - !ruby/object:Gem::Dependency
71
- name: minitest
72
- requirement: !ruby/object:Gem::Requirement
73
- requirements:
74
- - - "~>"
75
- - !ruby/object:Gem::Version
76
- version: '5.14'
77
- type: :development
78
- prerelease: false
79
- version_requirements: !ruby/object:Gem::Requirement
80
- requirements:
81
- - - "~>"
82
- - !ruby/object:Gem::Version
83
- version: '5.14'
84
- - !ruby/object:Gem::Dependency
85
- name: rake
86
- requirement: !ruby/object:Gem::Requirement
87
- requirements:
88
- - - "~>"
89
- - !ruby/object:Gem::Version
90
- version: '13.0'
91
- type: :development
92
- prerelease: false
93
- version_requirements: !ruby/object:Gem::Requirement
94
- requirements:
95
- - - "~>"
96
- - !ruby/object:Gem::Version
97
- version: '13.0'
98
- - !ruby/object:Gem::Dependency
99
- name: rdoc
100
- requirement: !ruby/object:Gem::Requirement
101
- requirements:
102
- - - ">="
103
- - !ruby/object:Gem::Version
104
- version: '4.0'
105
- - - "<"
106
- - !ruby/object:Gem::Version
107
- version: '7'
108
- type: :development
109
- prerelease: false
110
- version_requirements: !ruby/object:Gem::Requirement
111
- requirements:
112
- - - ">="
113
- - !ruby/object:Gem::Version
114
- version: '4.0'
115
- - - "<"
116
- - !ruby/object:Gem::Version
117
- version: '7'
118
- - !ruby/object:Gem::Dependency
119
- name: rr
120
- requirement: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: 1.2.0
125
- type: :development
126
- prerelease: false
127
- version_requirements: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - "~>"
130
- - !ruby/object:Gem::Version
131
- version: 1.2.0
132
- - !ruby/object:Gem::Dependency
133
- name: rubocop
134
- requirement: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - "~>"
137
- - !ruby/object:Gem::Version
138
- version: '1.1'
139
- type: :development
140
- prerelease: false
141
- version_requirements: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - "~>"
144
- - !ruby/object:Gem::Version
145
- version: '1.1'
146
- description: |-
147
- Loofah is a general library for manipulating and transforming HTML/XML documents and fragments, built on top of Nokogiri.
41
+ version: 1.12.0
42
+ description: |
43
+ Loofah is a general library for manipulating and transforming HTML/XML documents and fragments,
44
+ built on top of Nokogiri.
148
45
 
149
- Loofah excels at HTML sanitization (XSS prevention). It includes some nice HTML sanitizers, which are based on HTML5lib's safelist, so it most likely won't make your codes less secure. (These statements have not been evaluated by Netexperts.)
150
-
151
- ActiveRecord extensions for sanitization are available in the [`loofah-activerecord` gem](https://github.com/flavorjones/loofah-activerecord).
46
+ Loofah also includes some HTML sanitizers based on `html5lib`'s safelist, which are a specific
47
+ application of the general transformation functionality.
152
48
  email:
153
49
  - mike.dalessio@gmail.com
154
50
  - bryan@brynary.com
@@ -161,14 +57,16 @@ files:
161
57
  - README.md
162
58
  - SECURITY.md
163
59
  - lib/loofah.rb
60
+ - lib/loofah/concerns.rb
164
61
  - lib/loofah/elements.rb
165
62
  - lib/loofah/helpers.rb
166
- - lib/loofah/html/document.rb
167
- - lib/loofah/html/document_fragment.rb
63
+ - lib/loofah/html4/document.rb
64
+ - lib/loofah/html4/document_fragment.rb
65
+ - lib/loofah/html5/document.rb
66
+ - lib/loofah/html5/document_fragment.rb
168
67
  - lib/loofah/html5/libxml2_workarounds.rb
169
68
  - lib/loofah/html5/safelist.rb
170
69
  - lib/loofah/html5/scrub.rb
171
- - lib/loofah/instance_methods.rb
172
70
  - lib/loofah/metahelpers.rb
173
71
  - lib/loofah/scrubber.rb
174
72
  - lib/loofah/scrubbers.rb
@@ -184,7 +82,7 @@ metadata:
184
82
  bug_tracker_uri: https://github.com/flavorjones/loofah/issues
185
83
  changelog_uri: https://github.com/flavorjones/loofah/blob/main/CHANGELOG.md
186
84
  documentation_uri: https://www.rubydoc.info/gems/loofah/
187
- post_install_message:
85
+ post_install_message:
188
86
  rdoc_options: []
189
87
  require_paths:
190
88
  - lib
@@ -192,16 +90,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
192
90
  requirements:
193
91
  - - ">="
194
92
  - !ruby/object:Gem::Version
195
- version: '0'
93
+ version: 2.5.0
196
94
  required_rubygems_version: !ruby/object:Gem::Requirement
197
95
  requirements:
198
96
  - - ">="
199
97
  - !ruby/object:Gem::Version
200
98
  version: '0'
201
99
  requirements: []
202
- rubygems_version: 3.3.7
203
- signing_key:
100
+ rubygems_version: 3.5.22
101
+ signing_key:
204
102
  specification_version: 4
205
103
  summary: Loofah is a general library for manipulating and transforming HTML/XML documents
206
- and fragments, built on top of Nokogiri
104
+ and fragments, built on top of Nokogiri.
207
105
  test_files: []
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
- module Loofah
3
- module HTML # :nodoc:
4
- #
5
- # Subclass of Nokogiri::HTML::DocumentFragment.
6
- #
7
- # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
8
- #
9
- class DocumentFragment < Nokogiri::HTML::DocumentFragment
10
- include Loofah::TextBehavior
11
-
12
- class << self
13
- #
14
- # Overridden Nokogiri::HTML::DocumentFragment
15
- # constructor. Applications should use Loofah.fragment to
16
- # parse a fragment.
17
- #
18
- def parse(tags, encoding = nil)
19
- doc = Loofah::HTML::Document.new
20
-
21
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
22
- doc.encoding = encoding
23
-
24
- new(doc, tags)
25
- end
26
- end
27
-
28
- #
29
- # Returns the HTML markup contained by the fragment
30
- #
31
- def to_s
32
- serialize_root.children.to_s
33
- end
34
-
35
- alias :serialize :to_s
36
-
37
- def serialize_root
38
- at_xpath("./body") || self
39
- end
40
- end
41
- end
42
- end
@@ -1,133 +0,0 @@
1
- # frozen_string_literal: true
2
- module Loofah
3
- #
4
- # Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
5
- #
6
- # Traverse the document or fragment, invoking the +scrubber+ on
7
- # each node.
8
- #
9
- # +scrubber+ must either be one of the symbols representing the
10
- # built-in scrubbers (see Scrubbers), or a Scrubber instance.
11
- #
12
- # span2div = Loofah::Scrubber.new do |node|
13
- # node.name = "div" if node.name == "span"
14
- # end
15
- # Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
16
- # # => "<div>foo</div><p>bar</p>"
17
- #
18
- # or
19
- #
20
- # unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
21
- # Loofah.fragment(unsafe_html).scrub!(:strip).to_s
22
- # # => "ohai! <div>div is safe</div> "
23
- #
24
- # Note that this method is called implicitly from
25
- # Loofah.scrub_fragment and Loofah.scrub_document.
26
- #
27
- # Please see Scrubber for more information on implementation and traversal, and
28
- # README.rdoc for more example usage.
29
- #
30
- module ScrubBehavior
31
- module Node # :nodoc:
32
- def scrub!(scrubber)
33
- #
34
- # yes. this should be three separate methods. but nokogiri
35
- # decorates (or not) based on whether the module name has
36
- # already been included. and since documents get decorated
37
- # just like their constituent nodes, we need to jam all the
38
- # logic into a single module.
39
- #
40
- scrubber = ScrubBehavior.resolve_scrubber(scrubber)
41
- case self
42
- when Nokogiri::XML::Document
43
- scrubber.traverse(root) if root
44
- when Nokogiri::XML::DocumentFragment
45
- children.scrub! scrubber
46
- else
47
- scrubber.traverse(self)
48
- end
49
- self
50
- end
51
- end
52
-
53
- module NodeSet # :nodoc:
54
- def scrub!(scrubber)
55
- each { |node| node.scrub!(scrubber) }
56
- self
57
- end
58
- end
59
-
60
- def ScrubBehavior.resolve_scrubber(scrubber) # :nodoc:
61
- scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
62
- unless scrubber.is_a?(Loofah::Scrubber)
63
- raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
64
- end
65
- scrubber
66
- end
67
- end
68
-
69
- #
70
- # Overrides +text+ in HTML::Document and HTML::DocumentFragment,
71
- # and mixes in +to_text+.
72
- #
73
- module TextBehavior
74
- #
75
- # Returns a plain-text version of the markup contained by the document,
76
- # with HTML entities encoded.
77
- #
78
- # This method is significantly faster than #to_text, but isn't
79
- # clever about whitespace around block elements.
80
- #
81
- # Loofah.document("<h1>Title</h1><div>Content</div>").text
82
- # # => "TitleContent"
83
- #
84
- # By default, the returned text will have HTML entities
85
- # escaped. If you want unescaped entities, and you understand
86
- # that the result is unsafe to render in a browser, then you
87
- # can pass an argument as shown:
88
- #
89
- # frag = Loofah.fragment("&lt;script&gt;alert('EVIL');&lt;/script&gt;")
90
- # # ok for browser:
91
- # frag.text # => "&lt;script&gt;alert('EVIL');&lt;/script&gt;"
92
- # # decidedly not ok for browser:
93
- # frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
94
- #
95
- def text(options = {})
96
- result = if serialize_root
97
- serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
98
- else
99
- ""
100
- end
101
- if options[:encode_special_chars] == false
102
- result # possibly dangerous if rendered in a browser
103
- else
104
- encode_special_chars result
105
- end
106
- end
107
-
108
- alias :inner_text :text
109
- alias :to_str :text
110
-
111
- #
112
- # Returns a plain-text version of the markup contained by the
113
- # fragment, with HTML entities encoded.
114
- #
115
- # This method is slower than #text, but is clever about
116
- # whitespace around block elements and line break elements.
117
- #
118
- # Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
119
- # # => "\nTitle\n\nContent\nNext line\n"
120
- #
121
- def to_text(options = {})
122
- Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
123
- end
124
- end
125
-
126
- module DocumentDecorator # :nodoc:
127
- def initialize(*args, &block)
128
- super
129
- self.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
130
- self.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
131
- end
132
- end
133
- end