loofah 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -2,7 +2,9 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
2
 
3
3
  class TestApi < Test::Unit::TestCase
4
4
 
5
- HTML = "<div>a</div>\n<div>b</div>"
5
+ HTML = "<div>a</div>\n<div>b</div>"
6
+ XML_FRAGMENT = "<div>a</div>\n<div>b</div>"
7
+ XML = "<root>#{XML_FRAGMENT}</root>"
6
8
 
7
9
  def test_loofah_document
8
10
  doc = Loofah.document(HTML)
@@ -14,16 +16,36 @@ class TestApi < Test::Unit::TestCase
14
16
  assert_html_fragmentish doc
15
17
  end
16
18
 
19
+ def test_loofah_xml_document
20
+ doc = Loofah.xml_document(XML)
21
+ assert_xml_documentish doc
22
+ end
23
+
24
+ def test_loofah_xml_fragment
25
+ doc = Loofah.xml_fragment(XML_FRAGMENT)
26
+ assert_xml_fragmentish doc
27
+ end
28
+
17
29
  def test_loofah_html_document_parse_method
18
30
  doc = Loofah::HTML::Document.parse(HTML)
19
31
  assert_html_documentish doc
20
32
  end
21
33
 
34
+ def test_loofah_xml_document_parse_method
35
+ doc = Loofah::XML::Document.parse(XML)
36
+ assert_xml_documentish doc
37
+ end
38
+
22
39
  def test_loofah_html_document_fragment_parse_method
23
40
  doc = Loofah::HTML::DocumentFragment.parse(HTML)
24
41
  assert_html_fragmentish doc
25
42
  end
26
43
 
44
+ def test_loofah_xml_document_fragment_parse_method
45
+ doc = Loofah::XML::DocumentFragment.parse(XML_FRAGMENT)
46
+ assert_xml_fragmentish doc
47
+ end
48
+
27
49
  def test_loofah_document_scrub!
28
50
  doc = Loofah.document(HTML).scrub!(:strip)
29
51
  assert_html_documentish doc
@@ -34,6 +56,18 @@ class TestApi < Test::Unit::TestCase
34
56
  assert_html_fragmentish doc
35
57
  end
36
58
 
59
+ def test_loofah_xml_document_scrub!
60
+ scrubber = Loofah::Scrubber.new { |node| }
61
+ doc = Loofah.xml_document(XML).scrub!(scrubber)
62
+ assert_xml_documentish doc
63
+ end
64
+
65
+ def test_loofah_xml_fragment_scrub!
66
+ scrubber = Loofah::Scrubber.new { |node| }
67
+ doc = Loofah.xml_fragment(XML_FRAGMENT).scrub!(scrubber)
68
+ assert_xml_fragmentish doc
69
+ end
70
+
37
71
  private
38
72
 
39
73
  def assert_html_documentish(doc)
@@ -48,4 +82,16 @@ class TestApi < Test::Unit::TestCase
48
82
  assert_equal HTML, doc.inner_html
49
83
  end
50
84
 
85
+ def assert_xml_documentish(doc)
86
+ assert_kind_of Nokogiri::XML::Document, doc
87
+ assert_kind_of Loofah::XML::Document, doc
88
+ assert_equal XML, doc.root.to_xml
89
+ end
90
+
91
+ def assert_xml_fragmentish(doc)
92
+ assert_kind_of Nokogiri::XML::DocumentFragment, doc
93
+ assert_kind_of Loofah::XML::DocumentFragment, doc
94
+ assert_equal XML_FRAGMENT, doc.children.to_xml
95
+ end
96
+
51
97
  end
@@ -2,124 +2,226 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
2
 
3
3
  class TestScrubber < Test::Unit::TestCase
4
4
 
5
- [ Loofah::HTML::Document, Loofah::HTML::DocumentFragment ].each do |klass|
6
- define_method "test_#{klass}_bad_sanitize_method" do
7
- doc = klass.parse "<p>foo</p>"
8
- assert_raises(ArgumentError) { doc.scrub! :frippery }
5
+ FRAGMENT = "<span>hello</span><span>goodbye</span>"
6
+ FRAGMENT_NODE_COUNT = 4 # span, text, span, text
7
+ FRAGMENT_NODE_STOP_TOP_DOWN = 2 # span, span
8
+ DOCUMENT = "<html><head><link></link></head><body><span>hello</span><span>goodbye</span></body></html>"
9
+ DOCUMENT_NODE_COUNT = 5 # span, text, span, text
10
+ DOCUMENT_NODE_STOP_TOP_DOWN = 3 # link, span, span
11
+
12
+ context "receiving a block" do
13
+ setup do
14
+ @count = 0
9
15
  end
10
- end
11
-
12
- INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
13
- INVALID_ESCAPED = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
14
- INVALID_PRUNED = "<div>quux</div>"
15
- INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
16
-
17
- WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid>"
18
- WHITEWASH_RESULT = "<div>foo</div>"
19
-
20
- def test_document_escape_bad_tags
21
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
22
- result = doc.scrub! :escape
23
-
24
- assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
25
- assert_equal doc, result
26
- end
27
-
28
- def test_fragment_escape_bad_tags
29
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
30
- result = doc.scrub! :escape
31
-
32
- assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
33
- assert_equal doc, result
34
- end
35
-
36
- def test_document_prune_bad_tags
37
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
38
- result = doc.scrub! :prune
39
16
 
40
- assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
41
- assert_equal doc, result
42
- end
43
-
44
- def test_fragment_prune_bad_tags
45
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
46
- result = doc.scrub! :prune
47
-
48
- assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
49
- assert_equal doc, result
50
- end
51
-
52
- def test_document_strip_bad_tags
53
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
54
- result = doc.scrub! :strip
55
-
56
- assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
57
- assert_equal doc, result
58
- end
59
-
60
- def test_fragment_strip_bad_tags
61
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
62
- result = doc.scrub! :strip
17
+ context "returning CONTINUE" do
18
+ setup do
19
+ @scrubber = Loofah::Scrubber.new do |node|
20
+ @count += 1
21
+ Loofah::Scrubber::CONTINUE
22
+ end
23
+ end
24
+
25
+ should "operate properly on a fragment" do
26
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
27
+ assert_equal FRAGMENT_NODE_COUNT, @count
28
+ end
29
+
30
+ should "operate properly on a document" do
31
+ Loofah.scrub_document(DOCUMENT, @scrubber)
32
+ assert_equal DOCUMENT_NODE_COUNT, @count
33
+ end
34
+ end
63
35
 
64
- assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
65
- assert_equal doc, result
66
- end
36
+ context "returning STOP" do
37
+ setup do
38
+ @scrubber = Loofah::Scrubber.new do |node|
39
+ @count += 1
40
+ Loofah::Scrubber::STOP
41
+ end
42
+ end
43
+
44
+ should "operate as top-down on a fragment" do
45
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
46
+ assert_equal FRAGMENT_NODE_STOP_TOP_DOWN, @count
47
+ end
48
+
49
+ should "operate as top-down on a document" do
50
+ Loofah.scrub_document(DOCUMENT, @scrubber)
51
+ assert_equal DOCUMENT_NODE_STOP_TOP_DOWN, @count
52
+ end
53
+ end
67
54
 
68
- def test_document_whitewash
69
- doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
70
- result = doc.scrub! :whitewash
55
+ context "returning neither CONTINUE nor STOP" do
56
+ setup do
57
+ @scrubber = Loofah::Scrubber.new do |node|
58
+ @count += 1
59
+ end
60
+ end
61
+
62
+ should "act as if CONTINUE was returned" do
63
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
64
+ assert_equal FRAGMENT_NODE_COUNT, @count
65
+ end
66
+ end
71
67
 
72
- assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
73
- assert_equal doc, result
74
- end
68
+ context "not specifying direction" do
69
+ setup do
70
+ @scrubber = Loofah::Scrubber.new() do |node|
71
+ @count += 1
72
+ Loofah::Scrubber::STOP
73
+ end
74
+ end
75
+
76
+ should "operate as top-down on a fragment" do
77
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
78
+ assert_equal FRAGMENT_NODE_STOP_TOP_DOWN, @count
79
+ end
80
+
81
+ should "operate as top-down on a document" do
82
+ Loofah.scrub_document(DOCUMENT, @scrubber)
83
+ assert_equal DOCUMENT_NODE_STOP_TOP_DOWN, @count
84
+ end
85
+ end
75
86
 
76
- def test_fragment_whitewash
77
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
78
- result = doc.scrub! :whitewash
87
+ context "specifying top-down direction" do
88
+ setup do
89
+ @scrubber = Loofah::Scrubber.new(:direction => :top_down) do |node|
90
+ @count += 1
91
+ Loofah::Scrubber::STOP
92
+ end
93
+ end
94
+
95
+ should "operate as top-down on a fragment" do
96
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
97
+ assert_equal FRAGMENT_NODE_STOP_TOP_DOWN, @count
98
+ end
99
+
100
+ should "operate as top-down on a document" do
101
+ Loofah.scrub_document(DOCUMENT, @scrubber)
102
+ assert_equal DOCUMENT_NODE_STOP_TOP_DOWN, @count
103
+ end
104
+ end
79
105
 
80
- assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
81
- assert_equal doc, result
82
- end
106
+ context "specifying bottom-up direction" do
107
+ setup do
108
+ @scrubber = Loofah::Scrubber.new(:direction => :bottom_up) do |node|
109
+ @count += 1
110
+ end
111
+ end
112
+
113
+ should "operate as bottom-up on a fragment" do
114
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
115
+ assert_equal FRAGMENT_NODE_COUNT, @count
116
+ end
117
+
118
+ should "operate as bottom-up on a document" do
119
+ Loofah.scrub_document(DOCUMENT, @scrubber)
120
+ assert_equal DOCUMENT_NODE_COUNT, @count
121
+ end
122
+ end
83
123
 
84
- def test_fragment_shortcut
85
- mock_doc = mock
86
- Loofah.expects(:fragment).with(:string_or_io).returns(mock_doc)
87
- mock_doc.expects(:scrub!).with(:method)
124
+ context "invalid direction" do
125
+ should "raise an exception" do
126
+ assert_raises(ArgumentError) {
127
+ Loofah::Scrubber.new(:direction => :quux) { }
128
+ }
129
+ end
130
+ end
88
131
 
89
- Loofah.scrub_fragment(:string_or_io, :method)
132
+ context "given a block taking zero arguments" do
133
+ setup do
134
+ @scrubber = Loofah::Scrubber.new do
135
+ @count += 1
136
+ end
137
+ end
138
+
139
+ should "work anyway, shrug" do
140
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
141
+ assert_equal FRAGMENT_NODE_COUNT, @count
142
+ end
143
+ end
90
144
  end
91
145
 
92
- def test_document_shortcut
93
- mock_doc = mock
94
- Loofah.expects(:document).with(:string_or_io).returns(mock_doc)
95
- mock_doc.expects(:scrub!).with(:method)
146
+ context "defining a new Scrubber class" do
147
+ setup do
148
+ @klass = Class.new(Loofah::Scrubber) do
149
+ attr_accessor :count
150
+ def initialize(direction=nil)
151
+ @direction = direction
152
+ @count = 0
153
+ end
154
+ def scrub(node)
155
+ @count += 1
156
+ Loofah::Scrubber::STOP
157
+ end
158
+ end
159
+ end
96
160
 
97
- Loofah.scrub_document(:string_or_io, :method)
98
- end
161
+ context "when not specifying direction" do
162
+ setup do
163
+ @scrubber = @klass.new
164
+ assert_nil @scrubber.direction
165
+ end
166
+
167
+ should "operate as top-down on a fragment" do
168
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
169
+ assert_equal FRAGMENT_NODE_STOP_TOP_DOWN, @scrubber.count
170
+ end
171
+
172
+ should "operate as top-down on a document" do
173
+ Loofah.scrub_document(DOCUMENT, @scrubber)
174
+ assert_equal DOCUMENT_NODE_STOP_TOP_DOWN, @scrubber.count
175
+ end
176
+ end
99
177
 
100
- def test_document_to_s
101
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
102
- assert_not_nil doc.xpath("/html").first
103
- assert_not_nil doc.xpath("/html/head").first
104
- assert_not_nil doc.xpath("/html/body").first
178
+ context "when direction is specified as top_down" do
179
+ setup do
180
+ @scrubber = @klass.new(:top_down)
181
+ assert_equal :top_down, @scrubber.direction
182
+ end
183
+
184
+ should "operate as top-down on a fragment" do
185
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
186
+ assert_equal FRAGMENT_NODE_STOP_TOP_DOWN, @scrubber.count
187
+ end
188
+
189
+ should "operate as top-down on a document" do
190
+ Loofah.scrub_document(DOCUMENT, @scrubber)
191
+ assert_equal DOCUMENT_NODE_STOP_TOP_DOWN, @scrubber.count
192
+ end
193
+ end
105
194
 
106
- assert_contains doc.to_s, /<!DOCTYPE/
107
- assert_contains doc.to_s, /<html>/
108
- assert_contains doc.to_s, /<head>/
109
- assert_contains doc.to_s, /<body>/
195
+ context "when direction is specified as bottom_up" do
196
+ setup do
197
+ @scrubber = @klass.new(:bottom_up)
198
+ assert_equal :bottom_up, @scrubber.direction
199
+ end
200
+
201
+ should "operate as bottom-up on a fragment" do
202
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
203
+ assert_equal FRAGMENT_NODE_COUNT, @scrubber.count
204
+ end
205
+
206
+ should "operate as bottom-up on a document" do
207
+ Loofah.scrub_document(DOCUMENT, @scrubber)
208
+ assert_equal DOCUMENT_NODE_COUNT, @scrubber.count
209
+ end
210
+ end
110
211
  end
111
212
 
112
- def test_document_serialize
113
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
114
-
115
- assert_not_nil doc.xpath("/html").first
116
- assert_not_nil doc.xpath("/html/head").first
117
- assert_not_nil doc.xpath("/html/body").first
213
+ context "creating a new Scrubber class with no scrub method" do
214
+ setup do
215
+ @klass = Class.new(Loofah::Scrubber) do
216
+ def initialize ; end
217
+ end
218
+ @scrubber = @klass.new
219
+ end
118
220
 
119
- assert_contains doc.serialize, /<!DOCTYPE/
120
- assert_contains doc.serialize, /<html>/
121
- assert_contains doc.serialize, /<head>/
122
- assert_contains doc.serialize, /<body>/
221
+ should "raise an exception" do
222
+ assert_raises(Loofah::ScrubberNotFound) {
223
+ Loofah.scrub_fragment(FRAGMENT, @scrubber)
224
+ }
225
+ end
123
226
  end
124
-
125
227
  end
@@ -0,0 +1,144 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
+
3
+ class TestScrubber < Test::Unit::TestCase
4
+
5
+ [ Loofah::HTML::Document, Loofah::HTML::DocumentFragment ].each do |klass|
6
+ define_method "test_#{klass}_bad_sanitize_method" do
7
+ doc = klass.parse "<p>foo</p>"
8
+ assert_raises(Loofah::ScrubberNotFound) { doc.scrub! :frippery }
9
+ end
10
+ end
11
+
12
+ INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
13
+ INVALID_ESCAPED = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
14
+ INVALID_PRUNED = "<div>quux</div>"
15
+ INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
16
+
17
+ WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid>"
18
+ WHITEWASH_RESULT = "<div>foo</div>"
19
+
20
+ NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
21
+ NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
22
+
23
+ def test_document_escape_bad_tags
24
+ doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
25
+ result = doc.scrub! :escape
26
+
27
+ assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
28
+ assert_equal doc, result
29
+ end
30
+
31
+ def test_fragment_escape_bad_tags
32
+ doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
33
+ result = doc.scrub! :escape
34
+
35
+ assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
36
+ assert_equal doc, result
37
+ end
38
+
39
+ def test_document_prune_bad_tags
40
+ doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
41
+ result = doc.scrub! :prune
42
+
43
+ assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
44
+ assert_equal doc, result
45
+ end
46
+
47
+ def test_fragment_prune_bad_tags
48
+ doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
49
+ result = doc.scrub! :prune
50
+
51
+ assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
52
+ assert_equal doc, result
53
+ end
54
+
55
+ def test_document_strip_bad_tags
56
+ doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
57
+ result = doc.scrub! :strip
58
+
59
+ assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
60
+ assert_equal doc, result
61
+ end
62
+
63
+ def test_fragment_strip_bad_tags
64
+ doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
65
+ result = doc.scrub! :strip
66
+
67
+ assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
68
+ assert_equal doc, result
69
+ end
70
+
71
+ def test_document_whitewash
72
+ doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
73
+ result = doc.scrub! :whitewash
74
+
75
+ assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
76
+ assert_equal doc, result
77
+ end
78
+
79
+ def test_fragment_whitewash
80
+ doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
81
+ result = doc.scrub! :whitewash
82
+
83
+ assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
84
+ assert_equal doc, result
85
+ end
86
+
87
+ def test_document_nofollow
88
+ doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
89
+ result = doc.scrub! :nofollow
90
+
91
+ assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
92
+ assert_equal doc, result
93
+ end
94
+
95
+ def test_fragment_nofollow
96
+ doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
97
+ result = doc.scrub! :nofollow
98
+
99
+ assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
100
+ assert_equal doc, result
101
+ end
102
+
103
+ def test_fragment_shortcut
104
+ mock_doc = mock
105
+ Loofah.expects(:fragment).with(:string_or_io).returns(mock_doc)
106
+ mock_doc.expects(:scrub!).with(:method)
107
+
108
+ Loofah.scrub_fragment(:string_or_io, :method)
109
+ end
110
+
111
+ def test_document_shortcut
112
+ mock_doc = mock
113
+ Loofah.expects(:document).with(:string_or_io).returns(mock_doc)
114
+ mock_doc.expects(:scrub!).with(:method)
115
+
116
+ Loofah.scrub_document(:string_or_io, :method)
117
+ end
118
+
119
+ def test_document_to_s
120
+ doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
121
+ assert_not_nil doc.xpath("/html").first
122
+ assert_not_nil doc.xpath("/html/head").first
123
+ assert_not_nil doc.xpath("/html/body").first
124
+
125
+ assert_contains doc.to_s, /<!DOCTYPE/
126
+ assert_contains doc.to_s, /<html>/
127
+ assert_contains doc.to_s, /<head>/
128
+ assert_contains doc.to_s, /<body>/
129
+ end
130
+
131
+ def test_document_serialize
132
+ doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
133
+
134
+ assert_not_nil doc.xpath("/html").first
135
+ assert_not_nil doc.xpath("/html/head").first
136
+ assert_not_nil doc.xpath("/html/body").first
137
+
138
+ assert_contains doc.serialize, /<!DOCTYPE/
139
+ assert_contains doc.serialize, /<html>/
140
+ assert_contains doc.serialize, /<head>/
141
+ assert_contains doc.serialize, /<body>/
142
+ end
143
+
144
+ end