loofah 2.19.1 → 2.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +102 -0
- data/README.md +161 -115
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +78 -76
- data/lib/loofah/helpers.rb +21 -15
- data/lib/loofah/{html → html4}/document.rb +5 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
- data/lib/loofah/html5/safelist.rb +940 -924
- data/lib/loofah/html5/scrub.rb +36 -35
- data/lib/loofah/metahelpers.rb +10 -6
- data/lib/loofah/scrubber.rb +10 -8
- data/lib/loofah/scrubbers.rb +174 -43
- data/lib/loofah/version.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -6
- data/lib/loofah.rb +116 -43
- metadata +18 -122
- data/lib/loofah/html/document_fragment.rb +0 -42
- data/lib/loofah/instance_methods.rb +0 -133
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Loofah
|
3
4
|
module XML # :nodoc:
|
4
5
|
#
|
@@ -8,15 +9,10 @@ module Loofah
|
|
8
9
|
#
|
9
10
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
10
11
|
class << self
|
11
|
-
#
|
12
|
-
# Overridden Nokogiri::XML::DocumentFragment
|
13
|
-
# constructor. Applications should use Loofah.fragment to
|
14
|
-
# parse a fragment.
|
15
|
-
#
|
16
12
|
def parse(tags)
|
17
13
|
doc = Loofah::XML::Document.new
|
18
14
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
19
|
-
|
15
|
+
new(doc, tags)
|
20
16
|
end
|
21
17
|
end
|
22
18
|
end
|
data/lib/loofah.rb
CHANGED
@@ -1,8 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
3
2
|
|
4
3
|
require "nokogiri"
|
5
4
|
|
5
|
+
module Loofah
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
# Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
|
9
|
+
# subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
|
10
|
+
return @html5_support if defined? @html5_support
|
11
|
+
|
12
|
+
@html5_support =
|
13
|
+
Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
|
14
|
+
Nokogiri.uses_gumbo?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
6
19
|
require_relative "loofah/version"
|
7
20
|
require_relative "loofah/metahelpers"
|
8
21
|
require_relative "loofah/elements"
|
@@ -14,51 +27,129 @@ require_relative "loofah/html5/scrub"
|
|
14
27
|
require_relative "loofah/scrubber"
|
15
28
|
require_relative "loofah/scrubbers"
|
16
29
|
|
17
|
-
require_relative "loofah/
|
30
|
+
require_relative "loofah/concerns"
|
18
31
|
require_relative "loofah/xml/document"
|
19
32
|
require_relative "loofah/xml/document_fragment"
|
20
|
-
require_relative "loofah/
|
21
|
-
require_relative "loofah/
|
33
|
+
require_relative "loofah/html4/document"
|
34
|
+
require_relative "loofah/html4/document_fragment"
|
35
|
+
|
36
|
+
if Loofah.html5_support?
|
37
|
+
require_relative "loofah/html5/document"
|
38
|
+
require_relative "loofah/html5/document_fragment"
|
39
|
+
end
|
22
40
|
|
23
41
|
# == Strings and IO Objects as Input
|
24
42
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
43
|
+
# The following methods accept any IO object in addition to accepting a string:
|
44
|
+
#
|
45
|
+
# - Loofah.html4_document
|
46
|
+
# - Loofah.html4_fragment
|
47
|
+
# - Loofah.scrub_html4_document
|
48
|
+
# - Loofah.scrub_html4_fragment
|
49
|
+
#
|
50
|
+
# - Loofah.html5_document
|
51
|
+
# - Loofah.html5_fragment
|
52
|
+
# - Loofah.scrub_html5_document
|
53
|
+
# - Loofah.scrub_html5_fragment
|
54
|
+
#
|
55
|
+
# - Loofah.xml_document
|
56
|
+
# - Loofah.xml_fragment
|
57
|
+
# - Loofah.scrub_xml_document
|
58
|
+
# - Loofah.scrub_xml_fragment
|
59
|
+
#
|
60
|
+
# - Loofah.document
|
61
|
+
# - Loofah.fragment
|
62
|
+
# - Loofah.scrub_document
|
63
|
+
# - Loofah.scrub_fragment
|
64
|
+
#
|
65
|
+
# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
|
66
|
+
# and +close+.
|
30
67
|
#
|
31
68
|
module Loofah
|
69
|
+
# Alias for Loofah::HTML4
|
70
|
+
HTML = HTML4
|
71
|
+
|
32
72
|
class << self
|
33
|
-
# Shortcut for Loofah::
|
34
|
-
#
|
35
|
-
|
36
|
-
|
73
|
+
# Shortcut for Loofah::HTML4::Document.parse(*args, &block)
|
74
|
+
#
|
75
|
+
# This method accepts the same parameters as Nokogiri::HTML4::Document.parse
|
76
|
+
def html4_document(*args, &block)
|
77
|
+
Loofah::HTML4::Document.parse(*args, &block)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
81
|
+
#
|
82
|
+
# This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
|
83
|
+
def html4_fragment(*args, &block)
|
84
|
+
Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
37
85
|
end
|
38
86
|
|
39
|
-
# Shortcut for Loofah::
|
40
|
-
|
41
|
-
|
42
|
-
Loofah::HTML::DocumentFragment.parse(*args, &block)
|
87
|
+
# Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
88
|
+
def scrub_html4_document(string_or_io, method)
|
89
|
+
Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
43
90
|
end
|
44
91
|
|
45
|
-
# Shortcut for Loofah.
|
46
|
-
def
|
47
|
-
Loofah.
|
92
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
93
|
+
def scrub_html4_fragment(string_or_io, method)
|
94
|
+
Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
48
95
|
end
|
49
96
|
|
50
|
-
|
51
|
-
|
52
|
-
|
97
|
+
if Loofah.html5_support?
|
98
|
+
# Shortcut for Loofah::HTML5::Document.parse(*args, &block)
|
99
|
+
#
|
100
|
+
# This method accepts the same parameters as Nokogiri::HTML5::Document.parse
|
101
|
+
def html5_document(*args, &block)
|
102
|
+
Loofah::HTML5::Document.parse(*args, &block)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
106
|
+
#
|
107
|
+
# This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
|
108
|
+
def html5_fragment(*args, &block)
|
109
|
+
Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
113
|
+
def scrub_html5_document(string_or_io, method)
|
114
|
+
Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
118
|
+
def scrub_html5_fragment(string_or_io, method)
|
119
|
+
Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
120
|
+
end
|
121
|
+
else
|
122
|
+
def html5_document(*args, &block)
|
123
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
124
|
+
end
|
125
|
+
|
126
|
+
def html5_fragment(*args, &block)
|
127
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
128
|
+
end
|
129
|
+
|
130
|
+
def scrub_html5_document(string_or_io, method)
|
131
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
132
|
+
end
|
133
|
+
|
134
|
+
def scrub_html5_fragment(string_or_io, method)
|
135
|
+
raise NotImplementedError, "Loofah::HTML5 is not supported by your version of Nokogiri"
|
136
|
+
end
|
53
137
|
end
|
54
138
|
|
55
|
-
|
139
|
+
alias_method :document, :html4_document
|
140
|
+
alias_method :fragment, :html4_fragment
|
141
|
+
alias_method :scrub_document, :scrub_html4_document
|
142
|
+
alias_method :scrub_fragment, :scrub_html4_fragment
|
143
|
+
|
144
|
+
# Shortcut for Loofah::XML::Document.parse(*args, &block)
|
145
|
+
#
|
56
146
|
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
57
147
|
def xml_document(*args, &block)
|
58
148
|
Loofah::XML::Document.parse(*args, &block)
|
59
149
|
end
|
60
150
|
|
61
|
-
# Shortcut for Loofah::XML::DocumentFragment.parse
|
151
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
|
152
|
+
#
|
62
153
|
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
63
154
|
def xml_fragment(*args, &block)
|
64
155
|
Loofah::XML::DocumentFragment.parse(*args, &block)
|
@@ -78,23 +169,5 @@ module Loofah
|
|
78
169
|
def remove_extraneous_whitespace(string)
|
79
170
|
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
80
171
|
end
|
81
|
-
|
82
|
-
private
|
83
|
-
|
84
|
-
# remove comments that exist outside of the HTML element.
|
85
|
-
#
|
86
|
-
# these comments are allowed by the HTML spec:
|
87
|
-
#
|
88
|
-
# https://www.w3.org/TR/html401/struct/global.html#h-7.1
|
89
|
-
#
|
90
|
-
# but are not scrubbed by Loofah because these nodes don't meet
|
91
|
-
# the contract that scrubbers expect of a node (e.g., it can be
|
92
|
-
# replaced, sibling and children nodes can be created).
|
93
|
-
def remove_comments_before_html_element(doc)
|
94
|
-
doc.children.each do |child|
|
95
|
-
child.unlink if child.comment?
|
96
|
-
end
|
97
|
-
doc
|
98
|
-
end
|
99
172
|
end
|
100
173
|
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.24.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
8
8
|
- Bryan Helmkamp
|
9
|
-
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2025-01-01 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: crass
|
@@ -31,124 +30,20 @@ dependencies:
|
|
31
30
|
requirements:
|
32
31
|
- - ">="
|
33
32
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.
|
33
|
+
version: 1.12.0
|
35
34
|
type: :runtime
|
36
35
|
prerelease: false
|
37
36
|
version_requirements: !ruby/object:Gem::Requirement
|
38
37
|
requirements:
|
39
38
|
- - ">="
|
40
39
|
- !ruby/object:Gem::Version
|
41
|
-
version: 1.
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
requirements:
|
46
|
-
- - "~>"
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: '1.3'
|
49
|
-
type: :development
|
50
|
-
prerelease: false
|
51
|
-
version_requirements: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - "~>"
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '1.3'
|
56
|
-
- !ruby/object:Gem::Dependency
|
57
|
-
name: json
|
58
|
-
requirement: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '2.2'
|
63
|
-
type: :development
|
64
|
-
prerelease: false
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '2.2'
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: minitest
|
72
|
-
requirement: !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - "~>"
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '5.14'
|
77
|
-
type: :development
|
78
|
-
prerelease: false
|
79
|
-
version_requirements: !ruby/object:Gem::Requirement
|
80
|
-
requirements:
|
81
|
-
- - "~>"
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
version: '5.14'
|
84
|
-
- !ruby/object:Gem::Dependency
|
85
|
-
name: rake
|
86
|
-
requirement: !ruby/object:Gem::Requirement
|
87
|
-
requirements:
|
88
|
-
- - "~>"
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: '13.0'
|
91
|
-
type: :development
|
92
|
-
prerelease: false
|
93
|
-
version_requirements: !ruby/object:Gem::Requirement
|
94
|
-
requirements:
|
95
|
-
- - "~>"
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version: '13.0'
|
98
|
-
- !ruby/object:Gem::Dependency
|
99
|
-
name: rdoc
|
100
|
-
requirement: !ruby/object:Gem::Requirement
|
101
|
-
requirements:
|
102
|
-
- - ">="
|
103
|
-
- !ruby/object:Gem::Version
|
104
|
-
version: '4.0'
|
105
|
-
- - "<"
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
version: '7'
|
108
|
-
type: :development
|
109
|
-
prerelease: false
|
110
|
-
version_requirements: !ruby/object:Gem::Requirement
|
111
|
-
requirements:
|
112
|
-
- - ">="
|
113
|
-
- !ruby/object:Gem::Version
|
114
|
-
version: '4.0'
|
115
|
-
- - "<"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '7'
|
118
|
-
- !ruby/object:Gem::Dependency
|
119
|
-
name: rr
|
120
|
-
requirement: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: 1.2.0
|
125
|
-
type: :development
|
126
|
-
prerelease: false
|
127
|
-
version_requirements: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: 1.2.0
|
132
|
-
- !ruby/object:Gem::Dependency
|
133
|
-
name: rubocop
|
134
|
-
requirement: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - "~>"
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '1.1'
|
139
|
-
type: :development
|
140
|
-
prerelease: false
|
141
|
-
version_requirements: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: '1.1'
|
146
|
-
description: |-
|
147
|
-
Loofah is a general library for manipulating and transforming HTML/XML documents and fragments, built on top of Nokogiri.
|
40
|
+
version: 1.12.0
|
41
|
+
description: |
|
42
|
+
Loofah is a general library for manipulating and transforming HTML/XML documents and fragments,
|
43
|
+
built on top of Nokogiri.
|
148
44
|
|
149
|
-
Loofah
|
150
|
-
|
151
|
-
ActiveRecord extensions for sanitization are available in the [`loofah-activerecord` gem](https://github.com/flavorjones/loofah-activerecord).
|
45
|
+
Loofah also includes some HTML sanitizers based on `html5lib`'s safelist, which are a specific
|
46
|
+
application of the general transformation functionality.
|
152
47
|
email:
|
153
48
|
- mike.dalessio@gmail.com
|
154
49
|
- bryan@brynary.com
|
@@ -161,14 +56,16 @@ files:
|
|
161
56
|
- README.md
|
162
57
|
- SECURITY.md
|
163
58
|
- lib/loofah.rb
|
59
|
+
- lib/loofah/concerns.rb
|
164
60
|
- lib/loofah/elements.rb
|
165
61
|
- lib/loofah/helpers.rb
|
166
|
-
- lib/loofah/
|
167
|
-
- lib/loofah/
|
62
|
+
- lib/loofah/html4/document.rb
|
63
|
+
- lib/loofah/html4/document_fragment.rb
|
64
|
+
- lib/loofah/html5/document.rb
|
65
|
+
- lib/loofah/html5/document_fragment.rb
|
168
66
|
- lib/loofah/html5/libxml2_workarounds.rb
|
169
67
|
- lib/loofah/html5/safelist.rb
|
170
68
|
- lib/loofah/html5/scrub.rb
|
171
|
-
- lib/loofah/instance_methods.rb
|
172
69
|
- lib/loofah/metahelpers.rb
|
173
70
|
- lib/loofah/scrubber.rb
|
174
71
|
- lib/loofah/scrubbers.rb
|
@@ -184,7 +81,7 @@ metadata:
|
|
184
81
|
bug_tracker_uri: https://github.com/flavorjones/loofah/issues
|
185
82
|
changelog_uri: https://github.com/flavorjones/loofah/blob/main/CHANGELOG.md
|
186
83
|
documentation_uri: https://www.rubydoc.info/gems/loofah/
|
187
|
-
|
84
|
+
funding_uri: https://github.com/sponsors/flavorjones
|
188
85
|
rdoc_options: []
|
189
86
|
require_paths:
|
190
87
|
- lib
|
@@ -192,16 +89,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
192
89
|
requirements:
|
193
90
|
- - ">="
|
194
91
|
- !ruby/object:Gem::Version
|
195
|
-
version:
|
92
|
+
version: 2.5.0
|
196
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
197
94
|
requirements:
|
198
95
|
- - ">="
|
199
96
|
- !ruby/object:Gem::Version
|
200
97
|
version: '0'
|
201
98
|
requirements: []
|
202
|
-
rubygems_version: 3.
|
203
|
-
signing_key:
|
99
|
+
rubygems_version: 3.6.2
|
204
100
|
specification_version: 4
|
205
101
|
summary: Loofah is a general library for manipulating and transforming HTML/XML documents
|
206
|
-
and fragments, built on top of Nokogiri
|
102
|
+
and fragments, built on top of Nokogiri.
|
207
103
|
test_files: []
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module Loofah
|
3
|
-
module HTML # :nodoc:
|
4
|
-
#
|
5
|
-
# Subclass of Nokogiri::HTML::DocumentFragment.
|
6
|
-
#
|
7
|
-
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
8
|
-
#
|
9
|
-
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
10
|
-
include Loofah::TextBehavior
|
11
|
-
|
12
|
-
class << self
|
13
|
-
#
|
14
|
-
# Overridden Nokogiri::HTML::DocumentFragment
|
15
|
-
# constructor. Applications should use Loofah.fragment to
|
16
|
-
# parse a fragment.
|
17
|
-
#
|
18
|
-
def parse(tags, encoding = nil)
|
19
|
-
doc = Loofah::HTML::Document.new
|
20
|
-
|
21
|
-
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
|
22
|
-
doc.encoding = encoding
|
23
|
-
|
24
|
-
new(doc, tags)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
#
|
29
|
-
# Returns the HTML markup contained by the fragment
|
30
|
-
#
|
31
|
-
def to_s
|
32
|
-
serialize_root.children.to_s
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :serialize :to_s
|
36
|
-
|
37
|
-
def serialize_root
|
38
|
-
at_xpath("./body") || self
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,133 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module Loofah
|
3
|
-
#
|
4
|
-
# Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
|
5
|
-
#
|
6
|
-
# Traverse the document or fragment, invoking the +scrubber+ on
|
7
|
-
# each node.
|
8
|
-
#
|
9
|
-
# +scrubber+ must either be one of the symbols representing the
|
10
|
-
# built-in scrubbers (see Scrubbers), or a Scrubber instance.
|
11
|
-
#
|
12
|
-
# span2div = Loofah::Scrubber.new do |node|
|
13
|
-
# node.name = "div" if node.name == "span"
|
14
|
-
# end
|
15
|
-
# Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
16
|
-
# # => "<div>foo</div><p>bar</p>"
|
17
|
-
#
|
18
|
-
# or
|
19
|
-
#
|
20
|
-
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
21
|
-
# Loofah.fragment(unsafe_html).scrub!(:strip).to_s
|
22
|
-
# # => "ohai! <div>div is safe</div> "
|
23
|
-
#
|
24
|
-
# Note that this method is called implicitly from
|
25
|
-
# Loofah.scrub_fragment and Loofah.scrub_document.
|
26
|
-
#
|
27
|
-
# Please see Scrubber for more information on implementation and traversal, and
|
28
|
-
# README.rdoc for more example usage.
|
29
|
-
#
|
30
|
-
module ScrubBehavior
|
31
|
-
module Node # :nodoc:
|
32
|
-
def scrub!(scrubber)
|
33
|
-
#
|
34
|
-
# yes. this should be three separate methods. but nokogiri
|
35
|
-
# decorates (or not) based on whether the module name has
|
36
|
-
# already been included. and since documents get decorated
|
37
|
-
# just like their constituent nodes, we need to jam all the
|
38
|
-
# logic into a single module.
|
39
|
-
#
|
40
|
-
scrubber = ScrubBehavior.resolve_scrubber(scrubber)
|
41
|
-
case self
|
42
|
-
when Nokogiri::XML::Document
|
43
|
-
scrubber.traverse(root) if root
|
44
|
-
when Nokogiri::XML::DocumentFragment
|
45
|
-
children.scrub! scrubber
|
46
|
-
else
|
47
|
-
scrubber.traverse(self)
|
48
|
-
end
|
49
|
-
self
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
module NodeSet # :nodoc:
|
54
|
-
def scrub!(scrubber)
|
55
|
-
each { |node| node.scrub!(scrubber) }
|
56
|
-
self
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def ScrubBehavior.resolve_scrubber(scrubber) # :nodoc:
|
61
|
-
scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
|
62
|
-
unless scrubber.is_a?(Loofah::Scrubber)
|
63
|
-
raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
|
64
|
-
end
|
65
|
-
scrubber
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
#
|
70
|
-
# Overrides +text+ in HTML::Document and HTML::DocumentFragment,
|
71
|
-
# and mixes in +to_text+.
|
72
|
-
#
|
73
|
-
module TextBehavior
|
74
|
-
#
|
75
|
-
# Returns a plain-text version of the markup contained by the document,
|
76
|
-
# with HTML entities encoded.
|
77
|
-
#
|
78
|
-
# This method is significantly faster than #to_text, but isn't
|
79
|
-
# clever about whitespace around block elements.
|
80
|
-
#
|
81
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").text
|
82
|
-
# # => "TitleContent"
|
83
|
-
#
|
84
|
-
# By default, the returned text will have HTML entities
|
85
|
-
# escaped. If you want unescaped entities, and you understand
|
86
|
-
# that the result is unsafe to render in a browser, then you
|
87
|
-
# can pass an argument as shown:
|
88
|
-
#
|
89
|
-
# frag = Loofah.fragment("<script>alert('EVIL');</script>")
|
90
|
-
# # ok for browser:
|
91
|
-
# frag.text # => "<script>alert('EVIL');</script>"
|
92
|
-
# # decidedly not ok for browser:
|
93
|
-
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
|
94
|
-
#
|
95
|
-
def text(options = {})
|
96
|
-
result = if serialize_root
|
97
|
-
serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
|
98
|
-
else
|
99
|
-
""
|
100
|
-
end
|
101
|
-
if options[:encode_special_chars] == false
|
102
|
-
result # possibly dangerous if rendered in a browser
|
103
|
-
else
|
104
|
-
encode_special_chars result
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
alias :inner_text :text
|
109
|
-
alias :to_str :text
|
110
|
-
|
111
|
-
#
|
112
|
-
# Returns a plain-text version of the markup contained by the
|
113
|
-
# fragment, with HTML entities encoded.
|
114
|
-
#
|
115
|
-
# This method is slower than #text, but is clever about
|
116
|
-
# whitespace around block elements and line break elements.
|
117
|
-
#
|
118
|
-
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
-
# # => "\nTitle\n\nContent\nNext line\n"
|
120
|
-
#
|
121
|
-
def to_text(options = {})
|
122
|
-
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
module DocumentDecorator # :nodoc:
|
127
|
-
def initialize(*args, &block)
|
128
|
-
super
|
129
|
-
self.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
|
130
|
-
self.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|