loofah 2.19.0 → 2.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +100 -0
- data/README.md +157 -114
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +78 -76
- data/lib/loofah/helpers.rb +21 -15
- data/lib/loofah/{html → html4}/document.rb +5 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
- data/lib/loofah/html5/safelist.rb +940 -925
- data/lib/loofah/html5/scrub.rb +105 -34
- data/lib/loofah/metahelpers.rb +10 -6
- data/lib/loofah/scrubber.rb +14 -8
- data/lib/loofah/scrubbers.rb +121 -48
- data/lib/loofah/version.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -6
- data/lib/loofah.rb +116 -43
- metadata +20 -122
- data/lib/loofah/html/document_fragment.rb +0 -42
- data/lib/loofah/instance_methods.rb +0 -133
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
module XML # :nodoc:
|
4
5
|
#
|
@@ -8,15 +9,10 @@ module Loofah
|
|
8
9
|
#
|
9
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
10
11
|
class << self
|
11
|
-
#
|
12
|
-
# Overridden Nokogiri::XML::DocumentFragment
|
13
|
-
# constructor. Applications should use Loofah.fragment to
|
14
|
-
# parse a fragment.
|
15
|
-
#
|
16
12
|
def parse(tags)
|
17
13
|
doc = Loofah::XML::Document.new
|
18
14
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
19
|
-
|
15
|
+
new(doc, tags)
|
20
16
|
end
|
21
17
|
end
|
22
18
|
end
|
data/lib/loofah.rb
CHANGED
@@ -1,8 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
3
2
|
|
4
3
|
require "nokogiri"
|
5
4
|
|
5
|
+
module Loofah
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
# Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
|
9
|
+
# subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
|
10
|
+
return @html5_support if defined? @html5_support
|
11
|
+
|
12
|
+
@html5_support =
|
13
|
+
Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
|
14
|
+
Nokogiri.uses_gumbo?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
6
19
|
require_relative "loofah/version"
|
7
20
|
require_relative "loofah/metahelpers"
|
8
21
|
require_relative "loofah/elements"
|
@@ -14,51 +27,129 @@ require_relative "loofah/html5/scrub"
|
|
14
27
|
require_relative "loofah/scrubber"
|
15
28
|
require_relative "loofah/scrubbers"
|
16
29
|
|
17
|
-
require_relative "loofah/
|
30
|
+
require_relative "loofah/concerns"
|
18
31
|
require_relative "loofah/xml/document"
|
19
32
|
require_relative "loofah/xml/document_fragment"
|
20
|
-
require_relative "loofah/
|
21
|
-
require_relative "loofah/
|
33
|
+
require_relative "loofah/html4/document"
|
34
|
+
require_relative "loofah/html4/document_fragment"
|
35
|
+
|
36
|
+
if Loofah.html5_support?
|
37
|
+
require_relative "loofah/html5/document"
|
38
|
+
require_relative "loofah/html5/document_fragment"
|
39
|
+
end
|
22
40
|
|
23
41
|
# == Strings and IO Objects as Input
|
24
42
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
43
|
+
# The following methods accept any IO object in addition to accepting a string:
|
44
|
+
#
|
45
|
+
# - Loofah.html4_document
|
46
|
+
# - Loofah.html4_fragment
|
47
|
+
# - Loofah.scrub_html4_document
|
48
|
+
# - Loofah.scrub_html4_fragment
|
49
|
+
#
|
50
|
+
# - Loofah.html5_document
|
51
|
+
# - Loofah.html5_fragment
|
52
|
+
# - Loofah.scrub_html5_document
|
53
|
+
# - Loofah.scrub_html5_fragment
|
54
|
+
#
|
55
|
+
# - Loofah.xml_document
|
56
|
+
# - Loofah.xml_fragment
|
57
|
+
# - Loofah.scrub_xml_document
|
58
|
+
# - Loofah.scrub_xml_fragment
|
59
|
+
#
|
60
|
+
# - Loofah.document
|
61
|
+
# - Loofah.fragment
|
62
|
+
# - Loofah.scrub_document
|
63
|
+
# - Loofah.scrub_fragment
|
64
|
+
#
|
65
|
+
# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
|
66
|
+
# and +close+.
|
30
67
|
#
|
31
68
|
module Loofah
|
69
|
+
# Alias for Loofah::HTML4
|
70
|
+
HTML = HTML4
|
71
|
+
|
32
72
|
class << self
|
33
|
-
# Shortcut for Loofah::
|
34
|
-
#
|
35
|
-
|
36
|
-
|
73
|
+
# Shortcut for Loofah::HTML4::Document.parse(*args, &block)
|
74
|
+
#
|
75
|
+
# This method accepts the same parameters as Nokogiri::HTML4::Document.parse
|
76
|
+
def html4_document(*args, &block)
|
77
|
+
Loofah::HTML4::Document.parse(*args, &block)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
81
|
+
#
|
82
|
+
# This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
|
83
|
+
def html4_fragment(*args, &block)
|
84
|
+
Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
37
85
|
end
|
38
86
|
|
39
|
-
# Shortcut for Loofah::
|
40
|
-
|
41
|
-
|
42
|
-
Loofah::HTML::DocumentFragment.parse(*args, &block)
|
87
|
+
# Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
88
|
+
def scrub_html4_document(string_or_io, method)
|
89
|
+
Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
43
90
|
end
|
44
91
|
|
45
|
-
# Shortcut for Loofah.
|
46
|
-
def
|
47
|
-
Loofah.
|
92
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
93
|
+
def scrub_html4_fragment(string_or_io, method)
|
94
|
+
Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
48
95
|
end
|
49
96
|
|
50
|
-
|
51
|
-
|
52
|
-
|
97
|
+
if Loofah.html5_support?
|
98
|
+
# Shortcut for Loofah::HTML5::Document.parse(*args, &block)
|
99
|
+
#
|
100
|
+
# This method accepts the same parameters as Nokogiri::HTML5::Document.parse
|
101
|
+
def html5_document(*args, &block)
|
102
|
+
Loofah::HTML5::Document.parse(*args, &block)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
106
|
+
#
|
107
|
+
# This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
|
108
|
+
def html5_fragment(*args, &block)
|
109
|
+
Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
113
|
+
def scrub_html5_document(string_or_io, method)
|
114
|
+
Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
118
|
+
def scrub_html5_fragment(string_or_io, method)
|
119
|
+
Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
120
|
+
end
|
121
|
+
else
|
122
|
+
def html5_document(*args, &block)
|
123
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
124
|
+
end
|
125
|
+
|
126
|
+
def html5_fragment(*args, &block)
|
127
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
128
|
+
end
|
129
|
+
|
130
|
+
def scrub_html5_document(string_or_io, method)
|
131
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
132
|
+
end
|
133
|
+
|
134
|
+
def scrub_html5_fragment(string_or_io, method)
|
135
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
136
|
+
end
|
53
137
|
end
|
54
138
|
|
55
|
-
|
139
|
+
alias_method :document, :html4_document
|
140
|
+
alias_method :fragment, :html4_fragment
|
141
|
+
alias_method :scrub_document, :scrub_html4_document
|
142
|
+
alias_method :scrub_fragment, :scrub_html4_fragment
|
143
|
+
|
144
|
+
# Shortcut for Loofah::XML::Document.parse(*args, &block)
|
145
|
+
#
|
56
146
|
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
57
147
|
def xml_document(*args, &block)
|
58
148
|
Loofah::XML::Document.parse(*args, &block)
|
59
149
|
end
|
60
150
|
|
61
|
-
# Shortcut for Loofah::XML::DocumentFragment.parse
|
151
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
|
152
|
+
#
|
62
153
|
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
63
154
|
def xml_fragment(*args, &block)
|
64
155
|
Loofah::XML::DocumentFragment.parse(*args, &block)
|
@@ -78,23 +169,5 @@ module Loofah
|
|
78
169
|
def remove_extraneous_whitespace(string)
|
79
170
|
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
80
171
|
end
|
81
|
-
|
82
|
-
private
|
83
|
-
|
84
|
-
# remove comments that exist outside of the HTML element.
|
85
|
-
#
|
86
|
-
# these comments are allowed by the HTML spec:
|
87
|
-
#
|
88
|
-
# https://www.w3.org/TR/html401/struct/global.html#h-7.1
|
89
|
-
#
|
90
|
-
# but are not scrubbed by Loofah because these nodes don't meet
|
91
|
-
# the contract that scrubbers expect of a node (e.g., it can be
|
92
|
-
# replaced, sibling and children nodes can be created).
|
93
|
-
def remove_comments_before_html_element(doc)
|
94
|
-
doc.children.each do |child|
|
95
|
-
child.unlink if child.comment?
|
96
|
-
end
|
97
|
-
doc
|
98
|
-
end
|
99
172
|
end
|
100
173
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.23.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
8
8
|
- Bryan Helmkamp
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-10-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: crass
|
@@ -31,124 +31,20 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.
|
34
|
+
version: 1.12.0
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: 1.
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
requirements:
|
46
|
-
- - "~>"
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: '1.3'
|
49
|
-
type: :development
|
50
|
-
prerelease: false
|
51
|
-
version_requirements: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - "~>"
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '1.3'
|
56
|
-
- !ruby/object:Gem::Dependency
|
57
|
-
name: json
|
58
|
-
requirement: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '2.2'
|
63
|
-
type: :development
|
64
|
-
prerelease: false
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '2.2'
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: minitest
|
72
|
-
requirement: !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - "~>"
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '5.14'
|
77
|
-
type: :development
|
78
|
-
prerelease: false
|
79
|
-
version_requirements: !ruby/object:Gem::Requirement
|
80
|
-
requirements:
|
81
|
-
- - "~>"
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
version: '5.14'
|
84
|
-
- !ruby/object:Gem::Dependency
|
85
|
-
name: rake
|
86
|
-
requirement: !ruby/object:Gem::Requirement
|
87
|
-
requirements:
|
88
|
-
- - "~>"
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: '13.0'
|
91
|
-
type: :development
|
92
|
-
prerelease: false
|
93
|
-
version_requirements: !ruby/object:Gem::Requirement
|
94
|
-
requirements:
|
95
|
-
- - "~>"
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version: '13.0'
|
98
|
-
- !ruby/object:Gem::Dependency
|
99
|
-
name: rdoc
|
100
|
-
requirement: !ruby/object:Gem::Requirement
|
101
|
-
requirements:
|
102
|
-
- - ">="
|
103
|
-
- !ruby/object:Gem::Version
|
104
|
-
version: '4.0'
|
105
|
-
- - "<"
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
version: '7'
|
108
|
-
type: :development
|
109
|
-
prerelease: false
|
110
|
-
version_requirements: !ruby/object:Gem::Requirement
|
111
|
-
requirements:
|
112
|
-
- - ">="
|
113
|
-
- !ruby/object:Gem::Version
|
114
|
-
version: '4.0'
|
115
|
-
- - "<"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '7'
|
118
|
-
- !ruby/object:Gem::Dependency
|
119
|
-
name: rr
|
120
|
-
requirement: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: 1.2.0
|
125
|
-
type: :development
|
126
|
-
prerelease: false
|
127
|
-
version_requirements: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: 1.2.0
|
132
|
-
- !ruby/object:Gem::Dependency
|
133
|
-
name: rubocop
|
134
|
-
requirement: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - "~>"
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '1.1'
|
139
|
-
type: :development
|
140
|
-
prerelease: false
|
141
|
-
version_requirements: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: '1.1'
|
146
|
-
description: |-
|
147
|
-
Loofah is a general library for manipulating and transforming HTML/XML documents and fragments, built on top of Nokogiri.
|
41
|
+
version: 1.12.0
|
42
|
+
description: |
|
43
|
+
Loofah is a general library for manipulating and transforming HTML/XML documents and fragments,
|
44
|
+
built on top of Nokogiri.
|
148
45
|
|
149
|
-
Loofah
|
150
|
-
|
151
|
-
ActiveRecord extensions for sanitization are available in the [`loofah-activerecord` gem](https://github.com/flavorjones/loofah-activerecord).
|
46
|
+
Loofah also includes some HTML sanitizers based on `html5lib`'s safelist, which are a specific
|
47
|
+
application of the general transformation functionality.
|
152
48
|
email:
|
153
49
|
- mike.dalessio@gmail.com
|
154
50
|
- bryan@brynary.com
|
@@ -161,14 +57,16 @@ files:
|
|
161
57
|
- README.md
|
162
58
|
- SECURITY.md
|
163
59
|
- lib/loofah.rb
|
60
|
+
- lib/loofah/concerns.rb
|
164
61
|
- lib/loofah/elements.rb
|
165
62
|
- lib/loofah/helpers.rb
|
166
|
-
- lib/loofah/
|
167
|
-
- lib/loofah/
|
63
|
+
- lib/loofah/html4/document.rb
|
64
|
+
- lib/loofah/html4/document_fragment.rb
|
65
|
+
- lib/loofah/html5/document.rb
|
66
|
+
- lib/loofah/html5/document_fragment.rb
|
168
67
|
- lib/loofah/html5/libxml2_workarounds.rb
|
169
68
|
- lib/loofah/html5/safelist.rb
|
170
69
|
- lib/loofah/html5/scrub.rb
|
171
|
-
- lib/loofah/instance_methods.rb
|
172
70
|
- lib/loofah/metahelpers.rb
|
173
71
|
- lib/loofah/scrubber.rb
|
174
72
|
- lib/loofah/scrubbers.rb
|
@@ -184,7 +82,7 @@ metadata:
|
|
184
82
|
bug_tracker_uri: https://github.com/flavorjones/loofah/issues
|
185
83
|
changelog_uri: https://github.com/flavorjones/loofah/blob/main/CHANGELOG.md
|
186
84
|
documentation_uri: https://www.rubydoc.info/gems/loofah/
|
187
|
-
post_install_message:
|
85
|
+
post_install_message:
|
188
86
|
rdoc_options: []
|
189
87
|
require_paths:
|
190
88
|
- lib
|
@@ -192,16 +90,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
192
90
|
requirements:
|
193
91
|
- - ">="
|
194
92
|
- !ruby/object:Gem::Version
|
195
|
-
version:
|
93
|
+
version: 2.5.0
|
196
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
197
95
|
requirements:
|
198
96
|
- - ">="
|
199
97
|
- !ruby/object:Gem::Version
|
200
98
|
version: '0'
|
201
99
|
requirements: []
|
202
|
-
rubygems_version: 3.
|
203
|
-
signing_key:
|
100
|
+
rubygems_version: 3.5.22
|
101
|
+
signing_key:
|
204
102
|
specification_version: 4
|
205
103
|
summary: Loofah is a general library for manipulating and transforming HTML/XML documents
|
206
|
-
and fragments, built on top of Nokogiri
|
104
|
+
and fragments, built on top of Nokogiri.
|
207
105
|
test_files: []
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module Loofah
|
3
|
-
module HTML # :nodoc:
|
4
|
-
#
|
5
|
-
# Subclass of Nokogiri::HTML::DocumentFragment.
|
6
|
-
#
|
7
|
-
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
8
|
-
#
|
9
|
-
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
10
|
-
include Loofah::TextBehavior
|
11
|
-
|
12
|
-
class << self
|
13
|
-
#
|
14
|
-
# Overridden Nokogiri::HTML::DocumentFragment
|
15
|
-
# constructor. Applications should use Loofah.fragment to
|
16
|
-
# parse a fragment.
|
17
|
-
#
|
18
|
-
def parse(tags, encoding = nil)
|
19
|
-
doc = Loofah::HTML::Document.new
|
20
|
-
|
21
|
-
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
|
22
|
-
doc.encoding = encoding
|
23
|
-
|
24
|
-
new(doc, tags)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
#
|
29
|
-
# Returns the HTML markup contained by the fragment
|
30
|
-
#
|
31
|
-
def to_s
|
32
|
-
serialize_root.children.to_s
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :serialize :to_s
|
36
|
-
|
37
|
-
def serialize_root
|
38
|
-
at_xpath("./body") || self
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,133 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module Loofah
|
3
|
-
#
|
4
|
-
# Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
|
5
|
-
#
|
6
|
-
# Traverse the document or fragment, invoking the +scrubber+ on
|
7
|
-
# each node.
|
8
|
-
#
|
9
|
-
# +scrubber+ must either be one of the symbols representing the
|
10
|
-
# built-in scrubbers (see Scrubbers), or a Scrubber instance.
|
11
|
-
#
|
12
|
-
# span2div = Loofah::Scrubber.new do |node|
|
13
|
-
# node.name = "div" if node.name == "span"
|
14
|
-
# end
|
15
|
-
# Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
16
|
-
# # => "<div>foo</div><p>bar</p>"
|
17
|
-
#
|
18
|
-
# or
|
19
|
-
#
|
20
|
-
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
21
|
-
# Loofah.fragment(unsafe_html).scrub!(:strip).to_s
|
22
|
-
# # => "ohai! <div>div is safe</div> "
|
23
|
-
#
|
24
|
-
# Note that this method is called implicitly from
|
25
|
-
# Loofah.scrub_fragment and Loofah.scrub_document.
|
26
|
-
#
|
27
|
-
# Please see Scrubber for more information on implementation and traversal, and
|
28
|
-
# README.rdoc for more example usage.
|
29
|
-
#
|
30
|
-
module ScrubBehavior
|
31
|
-
module Node # :nodoc:
|
32
|
-
def scrub!(scrubber)
|
33
|
-
#
|
34
|
-
# yes. this should be three separate methods. but nokogiri
|
35
|
-
# decorates (or not) based on whether the module name has
|
36
|
-
# already been included. and since documents get decorated
|
37
|
-
# just like their constituent nodes, we need to jam all the
|
38
|
-
# logic into a single module.
|
39
|
-
#
|
40
|
-
scrubber = ScrubBehavior.resolve_scrubber(scrubber)
|
41
|
-
case self
|
42
|
-
when Nokogiri::XML::Document
|
43
|
-
scrubber.traverse(root) if root
|
44
|
-
when Nokogiri::XML::DocumentFragment
|
45
|
-
children.scrub! scrubber
|
46
|
-
else
|
47
|
-
scrubber.traverse(self)
|
48
|
-
end
|
49
|
-
self
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
module NodeSet # :nodoc:
|
54
|
-
def scrub!(scrubber)
|
55
|
-
each { |node| node.scrub!(scrubber) }
|
56
|
-
self
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def ScrubBehavior.resolve_scrubber(scrubber) # :nodoc:
|
61
|
-
scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
|
62
|
-
unless scrubber.is_a?(Loofah::Scrubber)
|
63
|
-
raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
|
64
|
-
end
|
65
|
-
scrubber
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
#
|
70
|
-
# Overrides +text+ in HTML::Document and HTML::DocumentFragment,
|
71
|
-
# and mixes in +to_text+.
|
72
|
-
#
|
73
|
-
module TextBehavior
|
74
|
-
#
|
75
|
-
# Returns a plain-text version of the markup contained by the document,
|
76
|
-
# with HTML entities encoded.
|
77
|
-
#
|
78
|
-
# This method is significantly faster than #to_text, but isn't
|
79
|
-
# clever about whitespace around block elements.
|
80
|
-
#
|
81
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").text
|
82
|
-
# # => "TitleContent"
|
83
|
-
#
|
84
|
-
# By default, the returned text will have HTML entities
|
85
|
-
# escaped. If you want unescaped entities, and you understand
|
86
|
-
# that the result is unsafe to render in a browser, then you
|
87
|
-
# can pass an argument as shown:
|
88
|
-
#
|
89
|
-
# frag = Loofah.fragment("<script>alert('EVIL');</script>")
|
90
|
-
# # ok for browser:
|
91
|
-
# frag.text # => "<script>alert('EVIL');</script>"
|
92
|
-
# # decidedly not ok for browser:
|
93
|
-
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
|
94
|
-
#
|
95
|
-
def text(options = {})
|
96
|
-
result = if serialize_root
|
97
|
-
serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
|
98
|
-
else
|
99
|
-
""
|
100
|
-
end
|
101
|
-
if options[:encode_special_chars] == false
|
102
|
-
result # possibly dangerous if rendered in a browser
|
103
|
-
else
|
104
|
-
encode_special_chars result
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
alias :inner_text :text
|
109
|
-
alias :to_str :text
|
110
|
-
|
111
|
-
#
|
112
|
-
# Returns a plain-text version of the markup contained by the
|
113
|
-
# fragment, with HTML entities encoded.
|
114
|
-
#
|
115
|
-
# This method is slower than #text, but is clever about
|
116
|
-
# whitespace around block elements and line break elements.
|
117
|
-
#
|
118
|
-
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
-
# # => "\nTitle\n\nContent\nNext line\n"
|
120
|
-
#
|
121
|
-
def to_text(options = {})
|
122
|
-
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
module DocumentDecorator # :nodoc:
|
127
|
-
def initialize(*args, &block)
|
128
|
-
super
|
129
|
-
self.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
|
130
|
-
self.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|