loofah 2.20.0 → 2.21.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -0
- data/README.md +97 -106
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +78 -76
- data/lib/loofah/helpers.rb +21 -15
- data/lib/loofah/{html → html4}/document.rb +5 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
- data/lib/loofah/html5/safelist.rb +937 -936
- data/lib/loofah/html5/scrub.rb +31 -31
- data/lib/loofah/metahelpers.rb +10 -6
- data/lib/loofah/scrubber.rb +10 -8
- data/lib/loofah/scrubbers.rb +52 -43
- data/lib/loofah/version.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -6
- data/lib/loofah.rb +119 -43
- metadata +15 -103
- data/lib/loofah/html/document_fragment.rb +0 -42
- data/lib/loofah/instance_methods.rb +0 -133
data/lib/loofah.rb
CHANGED
@@ -1,8 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
3
2
|
|
4
3
|
require "nokogiri"
|
5
4
|
|
5
|
+
module Loofah
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
# Note that Loofah can only support HTML5 in Nokogiri >= 1.14.0 because it requires the
|
9
|
+
# subclassing fix from https://github.com/sparklemotion/nokogiri/pull/2534
|
10
|
+
unless @html5_support_set
|
11
|
+
@html5_support = (
|
12
|
+
Gem::Version.new(Nokogiri::VERSION) > Gem::Version.new("1.14.0") &&
|
13
|
+
Nokogiri.uses_gumbo?
|
14
|
+
)
|
15
|
+
@html5_support_set = true
|
16
|
+
end
|
17
|
+
@html5_support
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
6
22
|
require_relative "loofah/version"
|
7
23
|
require_relative "loofah/metahelpers"
|
8
24
|
require_relative "loofah/elements"
|
@@ -14,51 +30,129 @@ require_relative "loofah/html5/scrub"
|
|
14
30
|
require_relative "loofah/scrubber"
|
15
31
|
require_relative "loofah/scrubbers"
|
16
32
|
|
17
|
-
require_relative "loofah/
|
33
|
+
require_relative "loofah/concerns"
|
18
34
|
require_relative "loofah/xml/document"
|
19
35
|
require_relative "loofah/xml/document_fragment"
|
20
|
-
require_relative "loofah/
|
21
|
-
require_relative "loofah/
|
36
|
+
require_relative "loofah/html4/document"
|
37
|
+
require_relative "loofah/html4/document_fragment"
|
38
|
+
|
39
|
+
if Nokogiri.respond_to?(:uses_gumbo?) && Nokogiri.uses_gumbo?
|
40
|
+
require_relative "loofah/html5/document"
|
41
|
+
require_relative "loofah/html5/document_fragment"
|
42
|
+
end
|
22
43
|
|
23
44
|
# == Strings and IO Objects as Input
|
24
45
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
46
|
+
# The following methods accept any IO object in addition to accepting a string:
|
47
|
+
#
|
48
|
+
# - Loofah.html4_document
|
49
|
+
# - Loofah.html4_fragment
|
50
|
+
# - Loofah.scrub_html4_document
|
51
|
+
# - Loofah.scrub_html4_fragment
|
52
|
+
#
|
53
|
+
# - Loofah.html5_document
|
54
|
+
# - Loofah.html5_fragment
|
55
|
+
# - Loofah.scrub_html5_document
|
56
|
+
# - Loofah.scrub_html5_fragment
|
57
|
+
#
|
58
|
+
# - Loofah.xml_document
|
59
|
+
# - Loofah.xml_fragment
|
60
|
+
# - Loofah.scrub_xml_document
|
61
|
+
# - Loofah.scrub_xml_fragment
|
62
|
+
#
|
63
|
+
# - Loofah.document
|
64
|
+
# - Loofah.fragment
|
65
|
+
# - Loofah.scrub_document
|
66
|
+
# - Loofah.scrub_fragment
|
67
|
+
#
|
68
|
+
# That IO object could be a file, or a socket, or a StringIO, or anything that responds to +read+
|
69
|
+
# and +close+.
|
30
70
|
#
|
31
71
|
module Loofah
|
72
|
+
# Alias for Loofah::HTML4
|
73
|
+
HTML = HTML4
|
74
|
+
|
32
75
|
class << self
|
33
|
-
# Shortcut for Loofah::
|
34
|
-
#
|
35
|
-
|
36
|
-
|
76
|
+
# Shortcut for Loofah::HTML4::Document.parse(*args, &block)
|
77
|
+
#
|
78
|
+
# This method accepts the same parameters as Nokogiri::HTML4::Document.parse
|
79
|
+
def html4_document(*args, &block)
|
80
|
+
Loofah::HTML4::Document.parse(*args, &block)
|
37
81
|
end
|
38
82
|
|
39
|
-
# Shortcut for Loofah::
|
40
|
-
#
|
41
|
-
|
42
|
-
|
83
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
84
|
+
#
|
85
|
+
# This method accepts the same parameters as Nokogiri::HTML4::DocumentFragment.parse
|
86
|
+
def html4_fragment(*args, &block)
|
87
|
+
Loofah::HTML4::DocumentFragment.parse(*args, &block)
|
43
88
|
end
|
44
89
|
|
45
|
-
# Shortcut for Loofah.
|
46
|
-
def
|
47
|
-
Loofah.
|
90
|
+
# Shortcut for Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
91
|
+
def scrub_html4_document(string_or_io, method)
|
92
|
+
Loofah::HTML4::Document.parse(string_or_io).scrub!(method)
|
48
93
|
end
|
49
94
|
|
50
|
-
# Shortcut for Loofah.
|
51
|
-
def
|
52
|
-
Loofah.
|
95
|
+
# Shortcut for Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
96
|
+
def scrub_html4_fragment(string_or_io, method)
|
97
|
+
Loofah::HTML4::DocumentFragment.parse(string_or_io).scrub!(method)
|
53
98
|
end
|
54
99
|
|
55
|
-
|
100
|
+
if Loofah.html5_support?
|
101
|
+
# Shortcut for Loofah::HTML5::Document.parse(*args, &block)
|
102
|
+
#
|
103
|
+
# This method accepts the same parameters as Nokogiri::HTML5::Document.parse
|
104
|
+
def html5_document(*args, &block)
|
105
|
+
Loofah::HTML5::Document.parse(*args, &block)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
109
|
+
#
|
110
|
+
# This method accepts the same parameters as Nokogiri::HTML5::DocumentFragment.parse
|
111
|
+
def html5_fragment(*args, &block)
|
112
|
+
Loofah::HTML5::DocumentFragment.parse(*args, &block)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Shortcut for Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
116
|
+
def scrub_html5_document(string_or_io, method)
|
117
|
+
Loofah::HTML5::Document.parse(string_or_io).scrub!(method)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Shortcut for Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
121
|
+
def scrub_html5_fragment(string_or_io, method)
|
122
|
+
Loofah::HTML5::DocumentFragment.parse(string_or_io).scrub!(method)
|
123
|
+
end
|
124
|
+
else
|
125
|
+
def html5_document(*args, &block)
|
126
|
+
raise NotImplementedError, "HTML5 is not supported by your version of Nokogiri"
|
127
|
+
end
|
128
|
+
|
129
|
+
def html5_fragment(*args, &block)
|
130
|
+
raise NotImplementedError, "HTML5 is not supported by your version of Nokogiri"
|
131
|
+
end
|
132
|
+
|
133
|
+
def scrub_html5_document(string_or_io, method)
|
134
|
+
raise NotImplementedError, "HTML5 is not supported by your version of Nokogiri"
|
135
|
+
end
|
136
|
+
|
137
|
+
def scrub_html5_fragment(string_or_io, method)
|
138
|
+
raise NotImplementedError, "HTML5 is not supported by your version of Nokogiri"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
alias_method :document, :html4_document
|
143
|
+
alias_method :fragment, :html4_fragment
|
144
|
+
alias_method :scrub_document, :scrub_html4_document
|
145
|
+
alias_method :scrub_fragment, :scrub_html4_fragment
|
146
|
+
|
147
|
+
# Shortcut for Loofah::XML::Document.parse(*args, &block)
|
148
|
+
#
|
56
149
|
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
57
150
|
def xml_document(*args, &block)
|
58
151
|
Loofah::XML::Document.parse(*args, &block)
|
59
152
|
end
|
60
153
|
|
61
|
-
# Shortcut for Loofah::XML::DocumentFragment.parse
|
154
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse(*args, &block)
|
155
|
+
#
|
62
156
|
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
63
157
|
def xml_fragment(*args, &block)
|
64
158
|
Loofah::XML::DocumentFragment.parse(*args, &block)
|
@@ -78,23 +172,5 @@ module Loofah
|
|
78
172
|
def remove_extraneous_whitespace(string)
|
79
173
|
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
80
174
|
end
|
81
|
-
|
82
|
-
private
|
83
|
-
|
84
|
-
# remove comments that exist outside of the HTML element.
|
85
|
-
#
|
86
|
-
# these comments are allowed by the HTML spec:
|
87
|
-
#
|
88
|
-
# https://www.w3.org/TR/html401/struct/global.html#h-7.1
|
89
|
-
#
|
90
|
-
# but are not scrubbed by Loofah because these nodes don't meet
|
91
|
-
# the contract that scrubbers expect of a node (e.g., it can be
|
92
|
-
# replaced, sibling and children nodes can be created).
|
93
|
-
def remove_comments_before_html_element(doc)
|
94
|
-
doc.children.each do |child|
|
95
|
-
child.unlink if child.comment?
|
96
|
-
end
|
97
|
-
doc
|
98
|
-
end
|
99
175
|
end
|
100
176
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.21.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-04-
|
12
|
+
date: 2023-04-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: crass
|
@@ -39,102 +39,12 @@ dependencies:
|
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: 1.5.9
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
requirements:
|
46
|
-
- - "~>"
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: '1.3'
|
49
|
-
type: :development
|
50
|
-
prerelease: false
|
51
|
-
version_requirements: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - "~>"
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '1.3'
|
56
|
-
- !ruby/object:Gem::Dependency
|
57
|
-
name: json
|
58
|
-
requirement: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '2.2'
|
63
|
-
type: :development
|
64
|
-
prerelease: false
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '2.2'
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: minitest
|
72
|
-
requirement: !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - "~>"
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '5.14'
|
77
|
-
type: :development
|
78
|
-
prerelease: false
|
79
|
-
version_requirements: !ruby/object:Gem::Requirement
|
80
|
-
requirements:
|
81
|
-
- - "~>"
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
version: '5.14'
|
84
|
-
- !ruby/object:Gem::Dependency
|
85
|
-
name: rake
|
86
|
-
requirement: !ruby/object:Gem::Requirement
|
87
|
-
requirements:
|
88
|
-
- - "~>"
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: '13.0'
|
91
|
-
type: :development
|
92
|
-
prerelease: false
|
93
|
-
version_requirements: !ruby/object:Gem::Requirement
|
94
|
-
requirements:
|
95
|
-
- - "~>"
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version: '13.0'
|
98
|
-
- !ruby/object:Gem::Dependency
|
99
|
-
name: rdoc
|
100
|
-
requirement: !ruby/object:Gem::Requirement
|
101
|
-
requirements:
|
102
|
-
- - ">="
|
103
|
-
- !ruby/object:Gem::Version
|
104
|
-
version: '4.0'
|
105
|
-
- - "<"
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
version: '7'
|
108
|
-
type: :development
|
109
|
-
prerelease: false
|
110
|
-
version_requirements: !ruby/object:Gem::Requirement
|
111
|
-
requirements:
|
112
|
-
- - ">="
|
113
|
-
- !ruby/object:Gem::Version
|
114
|
-
version: '4.0'
|
115
|
-
- - "<"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '7'
|
118
|
-
- !ruby/object:Gem::Dependency
|
119
|
-
name: rubocop
|
120
|
-
requirement: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '1.1'
|
125
|
-
type: :development
|
126
|
-
prerelease: false
|
127
|
-
version_requirements: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '1.1'
|
132
|
-
description: |-
|
133
|
-
Loofah is a general library for manipulating and transforming HTML/XML documents and fragments, built on top of Nokogiri.
|
134
|
-
|
135
|
-
Loofah excels at HTML sanitization (XSS prevention). It includes some nice HTML sanitizers, which are based on HTML5lib's safelist, so it most likely won't make your codes less secure. (These statements have not been evaluated by Netexperts.)
|
42
|
+
description: |
|
43
|
+
Loofah is a general library for manipulating and transforming HTML/XML documents and fragments,
|
44
|
+
built on top of Nokogiri.
|
136
45
|
|
137
|
-
|
46
|
+
Loofah also includes some HTML sanitizers based on `html5lib`'s safelist, which are a specific
|
47
|
+
application of the general transformation functionality.
|
138
48
|
email:
|
139
49
|
- mike.dalessio@gmail.com
|
140
50
|
- bryan@brynary.com
|
@@ -147,14 +57,16 @@ files:
|
|
147
57
|
- README.md
|
148
58
|
- SECURITY.md
|
149
59
|
- lib/loofah.rb
|
60
|
+
- lib/loofah/concerns.rb
|
150
61
|
- lib/loofah/elements.rb
|
151
62
|
- lib/loofah/helpers.rb
|
152
|
-
- lib/loofah/
|
153
|
-
- lib/loofah/
|
63
|
+
- lib/loofah/html4/document.rb
|
64
|
+
- lib/loofah/html4/document_fragment.rb
|
65
|
+
- lib/loofah/html5/document.rb
|
66
|
+
- lib/loofah/html5/document_fragment.rb
|
154
67
|
- lib/loofah/html5/libxml2_workarounds.rb
|
155
68
|
- lib/loofah/html5/safelist.rb
|
156
69
|
- lib/loofah/html5/scrub.rb
|
157
|
-
- lib/loofah/instance_methods.rb
|
158
70
|
- lib/loofah/metahelpers.rb
|
159
71
|
- lib/loofah/scrubber.rb
|
160
72
|
- lib/loofah/scrubbers.rb
|
@@ -181,13 +93,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
181
93
|
version: '0'
|
182
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
183
95
|
requirements:
|
184
|
-
- - "
|
96
|
+
- - ">"
|
185
97
|
- !ruby/object:Gem::Version
|
186
|
-
version:
|
98
|
+
version: 1.3.1
|
187
99
|
requirements: []
|
188
100
|
rubygems_version: 3.4.10
|
189
101
|
signing_key:
|
190
102
|
specification_version: 4
|
191
103
|
summary: Loofah is a general library for manipulating and transforming HTML/XML documents
|
192
|
-
and fragments, built on top of Nokogiri
|
104
|
+
and fragments, built on top of Nokogiri.
|
193
105
|
test_files: []
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module Loofah
|
3
|
-
module HTML # :nodoc:
|
4
|
-
#
|
5
|
-
# Subclass of Nokogiri::HTML::DocumentFragment.
|
6
|
-
#
|
7
|
-
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
8
|
-
#
|
9
|
-
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
10
|
-
include Loofah::TextBehavior
|
11
|
-
|
12
|
-
class << self
|
13
|
-
#
|
14
|
-
# Overridden Nokogiri::HTML::DocumentFragment
|
15
|
-
# constructor. Applications should use Loofah.fragment to
|
16
|
-
# parse a fragment.
|
17
|
-
#
|
18
|
-
def parse(tags, encoding = nil)
|
19
|
-
doc = Loofah::HTML::Document.new
|
20
|
-
|
21
|
-
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
|
22
|
-
doc.encoding = encoding
|
23
|
-
|
24
|
-
new(doc, tags)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
#
|
29
|
-
# Returns the HTML markup contained by the fragment
|
30
|
-
#
|
31
|
-
def to_s
|
32
|
-
serialize_root.children.to_s
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :serialize :to_s
|
36
|
-
|
37
|
-
def serialize_root
|
38
|
-
at_xpath("./body") || self
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,133 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module Loofah
|
3
|
-
#
|
4
|
-
# Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
|
5
|
-
#
|
6
|
-
# Traverse the document or fragment, invoking the +scrubber+ on
|
7
|
-
# each node.
|
8
|
-
#
|
9
|
-
# +scrubber+ must either be one of the symbols representing the
|
10
|
-
# built-in scrubbers (see Scrubbers), or a Scrubber instance.
|
11
|
-
#
|
12
|
-
# span2div = Loofah::Scrubber.new do |node|
|
13
|
-
# node.name = "div" if node.name == "span"
|
14
|
-
# end
|
15
|
-
# Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
16
|
-
# # => "<div>foo</div><p>bar</p>"
|
17
|
-
#
|
18
|
-
# or
|
19
|
-
#
|
20
|
-
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
21
|
-
# Loofah.fragment(unsafe_html).scrub!(:strip).to_s
|
22
|
-
# # => "ohai! <div>div is safe</div> "
|
23
|
-
#
|
24
|
-
# Note that this method is called implicitly from
|
25
|
-
# Loofah.scrub_fragment and Loofah.scrub_document.
|
26
|
-
#
|
27
|
-
# Please see Scrubber for more information on implementation and traversal, and
|
28
|
-
# README.rdoc for more example usage.
|
29
|
-
#
|
30
|
-
module ScrubBehavior
|
31
|
-
module Node # :nodoc:
|
32
|
-
def scrub!(scrubber)
|
33
|
-
#
|
34
|
-
# yes. this should be three separate methods. but nokogiri
|
35
|
-
# decorates (or not) based on whether the module name has
|
36
|
-
# already been included. and since documents get decorated
|
37
|
-
# just like their constituent nodes, we need to jam all the
|
38
|
-
# logic into a single module.
|
39
|
-
#
|
40
|
-
scrubber = ScrubBehavior.resolve_scrubber(scrubber)
|
41
|
-
case self
|
42
|
-
when Nokogiri::XML::Document
|
43
|
-
scrubber.traverse(root) if root
|
44
|
-
when Nokogiri::XML::DocumentFragment
|
45
|
-
children.scrub! scrubber
|
46
|
-
else
|
47
|
-
scrubber.traverse(self)
|
48
|
-
end
|
49
|
-
self
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
module NodeSet # :nodoc:
|
54
|
-
def scrub!(scrubber)
|
55
|
-
each { |node| node.scrub!(scrubber) }
|
56
|
-
self
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def ScrubBehavior.resolve_scrubber(scrubber) # :nodoc:
|
61
|
-
scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
|
62
|
-
unless scrubber.is_a?(Loofah::Scrubber)
|
63
|
-
raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
|
64
|
-
end
|
65
|
-
scrubber
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
#
|
70
|
-
# Overrides +text+ in HTML::Document and HTML::DocumentFragment,
|
71
|
-
# and mixes in +to_text+.
|
72
|
-
#
|
73
|
-
module TextBehavior
|
74
|
-
#
|
75
|
-
# Returns a plain-text version of the markup contained by the document,
|
76
|
-
# with HTML entities encoded.
|
77
|
-
#
|
78
|
-
# This method is significantly faster than #to_text, but isn't
|
79
|
-
# clever about whitespace around block elements.
|
80
|
-
#
|
81
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").text
|
82
|
-
# # => "TitleContent"
|
83
|
-
#
|
84
|
-
# By default, the returned text will have HTML entities
|
85
|
-
# escaped. If you want unescaped entities, and you understand
|
86
|
-
# that the result is unsafe to render in a browser, then you
|
87
|
-
# can pass an argument as shown:
|
88
|
-
#
|
89
|
-
# frag = Loofah.fragment("<script>alert('EVIL');</script>")
|
90
|
-
# # ok for browser:
|
91
|
-
# frag.text # => "<script>alert('EVIL');</script>"
|
92
|
-
# # decidedly not ok for browser:
|
93
|
-
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
|
94
|
-
#
|
95
|
-
def text(options = {})
|
96
|
-
result = if serialize_root
|
97
|
-
serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
|
98
|
-
else
|
99
|
-
""
|
100
|
-
end
|
101
|
-
if options[:encode_special_chars] == false
|
102
|
-
result # possibly dangerous if rendered in a browser
|
103
|
-
else
|
104
|
-
encode_special_chars result
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
alias :inner_text :text
|
109
|
-
alias :to_str :text
|
110
|
-
|
111
|
-
#
|
112
|
-
# Returns a plain-text version of the markup contained by the
|
113
|
-
# fragment, with HTML entities encoded.
|
114
|
-
#
|
115
|
-
# This method is slower than #text, but is clever about
|
116
|
-
# whitespace around block elements and line break elements.
|
117
|
-
#
|
118
|
-
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
-
# # => "\nTitle\n\nContent\nNext line\n"
|
120
|
-
#
|
121
|
-
def to_text(options = {})
|
122
|
-
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
module DocumentDecorator # :nodoc:
|
127
|
-
def initialize(*args, &block)
|
128
|
-
super
|
129
|
-
self.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
|
130
|
-
self.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|