loofah 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -1
- data/Gemfile +1 -1
- data/MIT-LICENSE.txt +1 -1
- data/Manifest.txt +1 -1
- data/README.md +361 -0
- data/Rakefile +1 -1
- data/lib/loofah.rb +1 -1
- data/lib/loofah/elements.rb +81 -6
- data/lib/loofah/html5/scrub.rb +2 -2
- data/lib/loofah/html5/whitelist.rb +7 -4
- data/lib/loofah/scrubbers.rb +6 -1
- data/test/html5/test_sanitizer.rb +36 -4
- data/test/integration/test_ad_hoc.rb +85 -68
- data/test/integration/test_html.rb +12 -2
- metadata +9 -22
- data/README.rdoc +0 -314
data/lib/loofah/elements.rb
CHANGED
@@ -2,13 +2,88 @@ require 'set'
|
|
2
2
|
|
3
3
|
module Loofah
|
4
4
|
module Elements
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
|
6
|
+
address
|
7
|
+
blockquote
|
8
|
+
center
|
9
|
+
dir
|
10
|
+
div
|
11
|
+
dl
|
12
|
+
fieldset
|
13
|
+
form
|
14
|
+
h1
|
15
|
+
h2
|
16
|
+
h3
|
17
|
+
h4
|
18
|
+
h5
|
19
|
+
h6
|
20
|
+
hr
|
21
|
+
isindex
|
22
|
+
menu
|
23
|
+
noframes
|
24
|
+
noscript
|
25
|
+
ol
|
26
|
+
p
|
27
|
+
pre
|
28
|
+
table
|
29
|
+
ul
|
30
|
+
]
|
9
31
|
|
10
|
-
#
|
11
|
-
|
32
|
+
# https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
33
|
+
STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
|
34
|
+
address
|
35
|
+
article
|
36
|
+
aside
|
37
|
+
blockquote
|
38
|
+
canvas
|
39
|
+
dd
|
40
|
+
div
|
41
|
+
dl
|
42
|
+
dt
|
43
|
+
fieldset
|
44
|
+
figcaption
|
45
|
+
figure
|
46
|
+
footer
|
47
|
+
form
|
48
|
+
h1
|
49
|
+
h2
|
50
|
+
h3
|
51
|
+
h4
|
52
|
+
h5
|
53
|
+
h6
|
54
|
+
header
|
55
|
+
hgroup
|
56
|
+
hr
|
57
|
+
li
|
58
|
+
main
|
59
|
+
nav
|
60
|
+
noscript
|
61
|
+
ol
|
62
|
+
output
|
63
|
+
p
|
64
|
+
pre
|
65
|
+
section
|
66
|
+
table
|
67
|
+
tfoot
|
68
|
+
ul
|
69
|
+
video
|
70
|
+
]
|
71
|
+
|
72
|
+
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
73
|
+
|
74
|
+
# The following elements may also be considered block-level
|
75
|
+
# elements since they may contain block-level elements
|
76
|
+
LOOSE_BLOCK_LEVEL = Set.new %w[dd
|
77
|
+
dt
|
78
|
+
frameset
|
79
|
+
li
|
80
|
+
tbody
|
81
|
+
td
|
82
|
+
tfoot
|
83
|
+
th
|
84
|
+
thead
|
85
|
+
tr
|
86
|
+
]
|
12
87
|
|
13
88
|
BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
|
14
89
|
end
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -44,7 +44,7 @@ module Loofah
|
|
44
44
|
elsif val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0] == 'data'
|
45
45
|
# permit only allowed data mediatypes
|
46
46
|
mediatype = val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[1]
|
47
|
-
mediatype,
|
47
|
+
mediatype, _ = mediatype.split(';')[0..1] if mediatype
|
48
48
|
if mediatype && !WhiteList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
49
49
|
attr_node.remove
|
50
50
|
next
|
@@ -79,7 +79,7 @@ module Loofah
|
|
79
79
|
style_tree.each do |node|
|
80
80
|
next unless node[:node] == :property
|
81
81
|
next if node[:children].any? do |child|
|
82
|
-
[:url, :bad_url
|
82
|
+
[:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !WhiteList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
|
83
83
|
end
|
84
84
|
name = node[:name].downcase
|
85
85
|
if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name)
|
@@ -51,7 +51,7 @@ module Loofah
|
|
51
51
|
caption center cite code col colgroup command datalist dd del
|
52
52
|
details dfn dir div dl dt em fieldset figcaption figure footer
|
53
53
|
font form h1 h2 h3 h4 h5 h6 header hr i img input ins kbd label
|
54
|
-
legend li map mark menu meter nav ol output optgroup option p
|
54
|
+
legend li main map mark menu meter nav ol output optgroup option p
|
55
55
|
pre q s samp section select small span strike strong sub summary
|
56
56
|
sup table tbody td textarea tfoot th thead time tr tt u ul var
|
57
57
|
video]
|
@@ -65,7 +65,7 @@ module Loofah
|
|
65
65
|
circle clipPath defs desc ellipse feGaussianBlur filter font-face
|
66
66
|
font-face-name font-face-src foreignObject
|
67
67
|
g glyph hkern linearGradient line marker mask metadata missing-glyph
|
68
|
-
mpath path polygon polyline radialGradient rect set stop svg switch
|
68
|
+
mpath path polygon polyline radialGradient rect set stop svg switch symbol
|
69
69
|
text textPath title tspan use]
|
70
70
|
|
71
71
|
ACCEPTABLE_ATTRIBUTES = Set.new %w[abbr accept accept-charset accesskey action
|
@@ -125,8 +125,8 @@ module Loofah
|
|
125
125
|
border-bottom-color border-collapse border-color border-left-color
|
126
126
|
border-right-color border-top-color clear color cursor direction
|
127
127
|
display elevation float font font-family font-size font-style
|
128
|
-
font-variant font-weight height letter-spacing line-height
|
129
|
-
pause pause-after pause-before pitch pitch-range richness speak
|
128
|
+
font-variant font-weight height letter-spacing line-height list-style-type
|
129
|
+
overflow pause pause-after pause-before pitch pitch-range richness speak
|
130
130
|
speak-header speak-numeral speak-punctuation speech-rate stress
|
131
131
|
text-align text-decoration text-indent unicode-bidi vertical-align
|
132
132
|
voice-family volume white-space width]
|
@@ -137,6 +137,8 @@ module Loofah
|
|
137
137
|
purple red right solid silver teal top transparent underline white
|
138
138
|
yellow]
|
139
139
|
|
140
|
+
ACCEPTABLE_CSS_FUNCTIONS = Set.new %w[calc rgb]
|
141
|
+
|
140
142
|
SHORTHAND_CSS_PROPERTIES = Set.new %w[background border margin padding]
|
141
143
|
|
142
144
|
ACCEPTABLE_SVG_PROPERTIES = Set.new %w[fill fill-opacity fill-rule stroke
|
@@ -155,6 +157,7 @@ module Loofah
|
|
155
157
|
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
156
158
|
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
157
159
|
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
160
|
+
ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
|
158
161
|
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
159
162
|
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
160
163
|
ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -99,7 +99,12 @@ module Loofah
|
|
99
99
|
|
100
100
|
def scrub(node)
|
101
101
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
102
|
-
node.
|
102
|
+
if node.children.length == 1 && node.children.first.cdata?
|
103
|
+
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
|
104
|
+
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
105
|
+
else
|
106
|
+
node.before node.children
|
107
|
+
end
|
103
108
|
node.remove
|
104
109
|
end
|
105
110
|
end
|
@@ -20,9 +20,9 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
20
20
|
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
21
21
|
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
|
22
22
|
sane = sanitize_html(input).gsub('"',"'")
|
23
|
-
htmloutput.gsub
|
24
|
-
xhtmloutput.gsub
|
25
|
-
rexmloutput.gsub
|
23
|
+
htmloutput = htmloutput.gsub('"',"'")
|
24
|
+
xhtmloutput = xhtmloutput.gsub('"',"'")
|
25
|
+
rexmloutput = rexmloutput.gsub('"',"'")
|
26
26
|
|
27
27
|
## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
|
28
28
|
## it would require a lot of manual hacking to make the tests match libxml's output.
|
@@ -136,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
136
136
|
check_sanitization(input, output, output, output)
|
137
137
|
end
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
141
141
|
define_method "test_should_allow_data_#{data_uri_type}_uris" do
|
142
142
|
input = %(<a href="data:#{data_uri_type}">foo</a>)
|
@@ -275,6 +275,38 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
275
275
|
assert_match %r/-0.05em/, sane.inner_html
|
276
276
|
end
|
277
277
|
|
278
|
+
def test_css_function_sanitization_leaves_whitelisted_functions_calc
|
279
|
+
html = "<span style=\"width:calc(5%)\">"
|
280
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
281
|
+
assert_match %r/calc\(5%\)/, sane.inner_html
|
282
|
+
|
283
|
+
html = "<span style=\"width: calc(5%)\">"
|
284
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
285
|
+
assert_match %r/calc\(5%\)/, sane.inner_html
|
286
|
+
end
|
287
|
+
|
288
|
+
def test_css_function_sanitization_leaves_whitelisted_functions_rgb
|
289
|
+
html = '<span style="color: rgb(255, 0, 0)">'
|
290
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
291
|
+
assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
|
292
|
+
end
|
293
|
+
|
294
|
+
def test_css_function_sanitization_leaves_whitelisted_list_style_type
|
295
|
+
html = "<ol style='list-style-type:lower-greek;'></ol>"
|
296
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
297
|
+
assert_match %r/list-style-type:lower-greek/, sane.inner_html
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
|
301
|
+
html = "<span style=\"width:attr(data-evil-attr)\">"
|
302
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
303
|
+
assert_match %r/<span><\/span>/, sane.inner_html
|
304
|
+
|
305
|
+
html = "<span style=\"width: attr(data-evil-attr)\">"
|
306
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
307
|
+
assert_match %r/<span><\/span>/, sane.inner_html
|
308
|
+
end
|
309
|
+
|
278
310
|
def test_issue_90_slow_regex
|
279
311
|
skip("timing tests are hard to make pass and have little regression-testing value")
|
280
312
|
|
@@ -16,66 +16,67 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
context "tests" do
|
20
|
+
def test_removal_of_illegal_tag
|
21
|
+
html = <<-HTML
|
21
22
|
following this there should be no jim tag
|
22
23
|
<jim>jim</jim>
|
23
24
|
was there?
|
24
25
|
HTML
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
27
|
+
assert sane.xpath("//jim").empty?
|
28
|
+
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
30
|
+
def test_removal_of_illegal_attribute
|
31
|
+
html = "<p class=bar foo=bar abbr=bar />"
|
32
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
33
|
+
node = sane.xpath("//p").first
|
34
|
+
assert node.attributes['class']
|
35
|
+
assert node.attributes['abbr']
|
36
|
+
assert_nil node.attributes['foo']
|
37
|
+
end
|
37
38
|
|
38
|
-
|
39
|
-
|
39
|
+
def test_removal_of_illegal_url_in_href
|
40
|
+
html = <<-HTML
|
40
41
|
<a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
|
41
42
|
<a href='http://jim.jim/'>this link should be fine</a>
|
42
43
|
HTML
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
45
|
+
nodes = sane.xpath("//a")
|
46
|
+
assert_nil nodes.first.attributes['href']
|
47
|
+
assert nodes.last.attributes['href']
|
48
|
+
end
|
48
49
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
50
|
+
def test_css_sanitization
|
51
|
+
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
52
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
53
|
+
assert_match %r/#000/, sane.inner_html
|
54
|
+
refute_match %r/foo\.com/, sane.inner_html
|
55
|
+
end
|
55
56
|
|
56
|
-
|
57
|
-
|
58
|
-
|
57
|
+
def test_fragment_with_no_tags
|
58
|
+
assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
|
59
|
+
end
|
59
60
|
|
60
|
-
|
61
|
-
|
62
|
-
|
61
|
+
def test_fragment_in_p_tag
|
62
|
+
assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
|
63
|
+
end
|
63
64
|
|
64
|
-
|
65
|
-
|
66
|
-
|
65
|
+
def test_fragment_in_p_tag_plus_stuff
|
66
|
+
assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
|
67
|
+
end
|
67
68
|
|
68
|
-
|
69
|
-
|
70
|
-
|
69
|
+
def test_fragment_with_text_nodes_leading_and_trailing
|
70
|
+
assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
|
71
|
+
end
|
71
72
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
def test_whitewash_on_fragment
|
74
|
+
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
75
|
+
whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
|
76
|
+
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
77
|
+
end
|
77
78
|
|
78
|
-
|
79
|
+
MSWORD_HTML = <<-EOHTML
|
79
80
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
80
81
|
<w:WordDocument>
|
81
82
|
<w:View>Normal</w:View>
|
@@ -141,36 +142,52 @@ mso-bidi-language:#0400;}
|
|
141
142
|
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
142
143
|
EOHTML
|
143
144
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
145
|
+
def test_fragment_whitewash_on_microsofty_markup
|
146
|
+
whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
|
147
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
|
148
|
+
end
|
148
149
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
150
|
+
def test_document_whitewash_on_microsofty_markup
|
151
|
+
whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
|
152
|
+
assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
|
153
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
|
154
|
+
end
|
154
155
|
|
155
|
-
|
156
|
-
|
157
|
-
|
156
|
+
def test_return_empty_string_when_nothing_left
|
157
|
+
assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_nested_script_cdata_tags_should_be_scrubbed
|
161
|
+
html = "<script><script src='malicious.js'></script>"
|
162
|
+
stripped = Loofah.fragment(html).scrub!(:strip)
|
163
|
+
assert_empty stripped.xpath("//script")
|
164
|
+
refute_match("<script", stripped.to_html)
|
165
|
+
end
|
158
166
|
|
159
|
-
|
160
|
-
|
167
|
+
def test_nested_script_cdata_tags_should_be_scrubbed_2
|
168
|
+
html = "<script><script>alert('a');</script></script>"
|
169
|
+
stripped = Loofah.fragment(html).scrub!(:strip)
|
170
|
+
assert_empty stripped.xpath("//script")
|
171
|
+
refute_match("<script", stripped.to_html)
|
172
|
+
end
|
173
|
+
|
174
|
+
def test_removal_of_all_tags
|
175
|
+
html = <<-HTML
|
161
176
|
What's up <strong>doc</strong>?
|
162
177
|
HTML
|
163
|
-
|
164
|
-
|
165
|
-
|
178
|
+
stripped = Loofah.scrub_document(html, :prune).text
|
179
|
+
assert_equal %Q(What\'s up doc?).strip, stripped.strip
|
180
|
+
end
|
166
181
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
182
|
+
def test_dont_remove_whitespace
|
183
|
+
html = "Foo\nBar"
|
184
|
+
assert_equal html, Loofah.scrub_document(html, :prune).text
|
185
|
+
end
|
171
186
|
|
172
|
-
|
173
|
-
|
174
|
-
|
187
|
+
def test_dont_remove_whitespace_between_tags
|
188
|
+
html = "<p>Foo</p>\n<p>Bar</p>"
|
189
|
+
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
|
190
|
+
end
|
175
191
|
end
|
176
192
|
end
|
193
|
+
|
@@ -19,11 +19,16 @@ class IntegrationTestHtml < Loofah::TestCase
|
|
19
19
|
end
|
20
20
|
|
21
21
|
context "#to_text" do
|
22
|
-
it "add newlines before and after block elements" do
|
22
|
+
it "add newlines before and after html4 block elements" do
|
23
23
|
html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
24
24
|
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
25
25
|
end
|
26
26
|
|
27
|
+
it "add newlines before and after html5 block elements" do
|
28
|
+
html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
29
|
+
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
30
|
+
end
|
31
|
+
|
27
32
|
it "remove extraneous whitespace" do
|
28
33
|
html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
29
34
|
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
@@ -47,11 +52,16 @@ class IntegrationTestHtml < Loofah::TestCase
|
|
47
52
|
end
|
48
53
|
|
49
54
|
context "#to_text" do
|
50
|
-
it "add newlines before and after block elements" do
|
55
|
+
it "add newlines before and after html4 block elements" do
|
51
56
|
html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
52
57
|
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
53
58
|
end
|
54
59
|
|
60
|
+
it "add newlines before and after html5 block elements" do
|
61
|
+
html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
62
|
+
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
63
|
+
end
|
64
|
+
|
55
65
|
it "remove extraneous whitespace" do
|
56
66
|
html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
57
67
|
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|