htmltools 1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/ruby
2
+ require 'html/tree'
3
+ require 'test/unit'
4
+
5
+ module HTMLTree
6
+ class Parser
7
+ attr_reader :currentNode
8
+ attr_reader :rootNode
9
+ end
10
+
11
+ module TreeElement
12
+ alias :ch :children
13
+ end
14
+ end
15
+
16
+ class HTMLTreeParserTestCase < Test::Unit::TestCase
17
+ def setup
18
+ @p = HTMLTree::Parser.new(true, false)
19
+ end
20
+
21
+ def rn
22
+ @p.rootNode
23
+ end
24
+
25
+ def cn
26
+ @p.currentNode
27
+ end
28
+
29
+ attr_reader :p
30
+
31
+ def test_empty
32
+ assert_equal(cn, rn)
33
+ assert_equal(nil, cn.parent)
34
+ assert_equal([], rn.children)
35
+ end
36
+
37
+ def test_skeleton
38
+ d = rn
39
+ assert_equal(d.class, HTMLTree::Document)
40
+ p.feed('<html></html>')
41
+ assert_equal(d, cn)
42
+ assert_equal(d, rn)
43
+ assert_equal(rn, @p.tree)
44
+ assert_equal(@p.html, @p.tree.html_node)
45
+ assert_equal(@p.html, rn.ch[0])
46
+ assert_equal(1, rn.ch.size)
47
+ assert_equal('', rn.tag)
48
+ assert_equal('html', rn.ch[0].tag)
49
+ assert_equal(d.html_node, rn.ch[0])
50
+ end
51
+
52
+ def test_reset
53
+ p.feed('<html><head></head><body attrib1="xxx"></body></html>')
54
+ assert_equal(1, rn.ch.size)
55
+ assert_equal(2, rn.ch[0].ch.size)
56
+ p.reset
57
+ assert_equal(0, rn.ch.size)
58
+ p.feed('<html><head></head><body attrib1="xxx"></body></html>')
59
+ assert_equal(1, rn.ch.size)
60
+ assert_equal(2, rn.ch[0].ch.size)
61
+ end
62
+
63
+ def test_skeleton2
64
+ d = rn
65
+ p.feed('<html><head></head><body attrib1="xxx"></body></html>')
66
+ assert_equal(d, cn)
67
+ assert_equal(d, rn)
68
+ assert_equal(1, rn.ch.size)
69
+ h = rn.ch[0] # html
70
+ assert_equal(d.html_node, h)
71
+ assert_equal('html', h.tag)
72
+ assert_equal(2, h.ch.size)
73
+ assert_equal(0, h.ch[1].ch.size)
74
+ assert_equal(0, h.ch[0].ch.size)
75
+ assert_equal('head', h.ch[0].tag)
76
+ assert_equal('body', h.ch[1].tag)
77
+ assert_equal('xxx', h.ch[1].attribute('attrib1'))
78
+ end
79
+
80
+ def test_empty_tag
81
+ d = rn
82
+ p.feed('<html><head></head><body attrib1="xxx"><br></body></html>')
83
+ assert_equal(d, cn)
84
+ h = rn.ch[0]
85
+ assert_equal('html', h.tag)
86
+ assert_equal(2, h.ch.size)
87
+ assert_equal(0, h.ch[0].ch.size)
88
+ assert_equal(1, h.ch[1].ch.size)
89
+ assert_equal('head', h.ch[0].tag)
90
+ assert_equal('body', h.ch[1].tag)
91
+ assert_equal('br', h.ch[1].ch[0].tag)
92
+ end
93
+
94
+ def test_no_end_tag
95
+ p.feed("<html><body>Foo<br />bar</body></html>")
96
+ h = rn.ch[0]
97
+ assert_equal('html', h.tag)
98
+ assert_equal('body', h.ch[0].tag)
99
+ assert_equal('br', h.ch[0].ch[1].tag)
100
+ assert_equal({}, h.ch[0].ch[1].attributes)
101
+ assert_equal([], h.ch[0].ch[1].ch)
102
+ assert_equal('Foo', h.ch[0].ch[0].content)
103
+ assert_equal('bar', h.ch[0].ch[2].content)
104
+ end
105
+
106
+ def test_content
107
+ d = rn
108
+ p.feed('<html><head></head><body attrib1="xxx"><p>stuff</p></body></html>')
109
+ assert_equal(d, rn)
110
+ assert_equal(d, cn)
111
+ h = rn.ch[0]
112
+ assert_equal('html', h.tag)
113
+ assert_equal(2, h.ch.size) # html => head, body
114
+ assert_equal(0, h.ch[0].ch.size) # head =>
115
+ assert_equal(1, h.ch[1].ch.size) # body=>p
116
+ assert_equal(1, h.ch[1].ch[0].ch.size) #p=>stuff
117
+ assert_equal('head', h.ch[0].tag)
118
+ assert_equal('body', h.ch[1].tag)
119
+ assert_equal('p', h.ch[1].ch[0].tag)
120
+ data = h.ch[1].ch[0].ch[0] # html/body/p/<data>
121
+ assert_equal(true, data.data?)
122
+ assert_equal('', data.tag)
123
+ assert_equal('stuff', data.to_s)
124
+ assert_equal({}, data.attributes)
125
+ end
126
+
127
+ def test_unclosed_li
128
+ p.feed('<html><body><ul><li>Item 1<li>Item 2<li>Item 3</ul></body></html>')
129
+
130
+ html = rn.ch[0]
131
+ assert_equal('html', html.tag)
132
+
133
+ ul = html.ch[0].ch[0]
134
+ assert_equal('ul', ul.tag)
135
+
136
+ assert_equal(3, ul.ch.size)
137
+ end
138
+
139
+ def test_partial_file
140
+ p.feed("<ul><li>test</li><li>test test</li></ul>")
141
+ li = rn.ch[0]
142
+ assert_equal('ul', li.tag)
143
+ assert_equal(2, li.ch.size)
144
+ assert_equal('li', li.ch[0].tag)
145
+ assert_equal('li', li.ch[1].tag)
146
+ end
147
+
148
+ def test_break_nesting
149
+ p.feed('<HTML><BODY><p><ul><LI></ul><p></BODY></HTML>')
150
+
151
+ expected = tree("html",
152
+ tree("body", tree("p", tree("ul", tree("li"))), tree("p"))
153
+ )
154
+ expected.assert_matches(p.html)
155
+ end
156
+
157
+ def test_meta
158
+ p.feed('<html><head><META NAME="robots" CONTENT="noindex,follow"></head><body></body></html>')
159
+
160
+ expected = tree("html",
161
+ tree("head", tree("meta")),
162
+ tree("body"))
163
+
164
+ expected.assert_matches(p.html)
165
+ end
166
+
167
+ class VerificationTree
168
+ include Test::Unit::Assertions
169
+
170
+ def initialize(tag)
171
+ @tag = tag
172
+ @children = []
173
+ end
174
+
175
+ def add_children(children)
176
+ @children = children
177
+ self
178
+ end
179
+
180
+ def assert_matches(tree)
181
+ assert_equal(@tag, tree.tag)
182
+
183
+ assert_equal(tree.elements.collect { |node| node.tag }, @children.collect { |node| node.tag }, tree.path)
184
+ assert_children_matches(@children, tree.elements)
185
+ end
186
+
187
+ def assert_children_matches(children, treechildren)
188
+ for i in (0...children.size) do
189
+ children[i].assert_matches(treechildren[i])
190
+ end
191
+ end
192
+
193
+ attr_reader :tag
194
+ end
195
+
196
+ def tree(tag, *children)
197
+ tree = VerificationTree.new(tag)
198
+ tree.add_children(children)
199
+ tree
200
+ end
201
+ end
@@ -0,0 +1,160 @@
1
+ # $Id: tc_source-parser.rb,v 1.3 2006/07/24 09:28:19 Philip Dorrell Exp $
2
+
3
+ require 'html/stparser'
4
+ require 'test/unit'
5
+
6
+ class TestSourceParser < HTML::SGMLParser
7
+ def initialize(verbose, test_case)
8
+ super(verbose)
9
+ @test_case = test_case
10
+ @fulldata = ""
11
+ end
12
+ attr_reader :test_case
13
+
14
+ def feed(data)
15
+ @fulldata = @fulldata + data
16
+ super(data)
17
+ end
18
+
19
+ def last_src
20
+ return @fulldata[src_range]
21
+ end
22
+
23
+ def warn(msg); test_case.callback('warn', msg); end
24
+
25
+ def handle_starttag(tag, method, attrs); test_case.callback('starttag', last_src); end
26
+ def unknown_starttag(tag, attrs); test_case.callback('starttag', last_src); end
27
+ def handle_endtag(tag, method); test_case.callback('endtag', last_src); end
28
+ def unknown_endtag(tag); test_case.callback('endtag', last_src); end
29
+ def handle_charref(name); test_case.callback('charref', last_src); end
30
+ def handle_entityref(name); test_case.callback('entityref', last_src); end
31
+ def handle_data(data); test_case.callback('data', last_src); end
32
+ def handle_comment(data); test_case.callback('comment', last_src); end
33
+ def handle_special(data); test_case.callback('special', last_src); end
34
+ end
35
+
36
+ class SourceParserTestCase < Test::Unit::TestCase
37
+
38
+ def setup
39
+ @parser = TestSourceParser.new(true, self)
40
+ @callbacks = []
41
+ end
42
+
43
+ def callback(*stuff)
44
+ @callbacks << stuff
45
+ end
46
+
47
+ attr_reader :parser
48
+ attr_reader :callbacks
49
+
50
+ # run the given block and return the callbacks if any
51
+ def callbacks_from
52
+ @callbacks = []
53
+ yield
54
+ #show_callbacks
55
+ @callbacks
56
+ end
57
+
58
+ def show_callbacks
59
+ puts "Callbacks: "
60
+ @callbacks.each do |callback|
61
+ puts " [#{callback[0]}: \"#{callback[1]}\"]"
62
+ end
63
+ end
64
+
65
+ def test_empty_html
66
+ cbs = callbacks_from { parser.feed('<html>') }
67
+ assert_equal(1, cbs.size, "cbs is #{cbs.inspect}")
68
+ assert_equal(['starttag', '<html>'], cbs[0])
69
+
70
+ cbs = callbacks_from{ parser.feed('</html>') }
71
+ assert_equal(1, cbs.size, "cbs is #{cbs.inspect}")
72
+ assert_equal(['endtag', '</html>'], cbs[0])
73
+ end
74
+
75
+ def test_attribs
76
+ cbs = callbacks_from {
77
+ parser.feed('<html><body bgcolor="#ffffff" width="123"><p>Fred</p></body></html>')
78
+ }
79
+ assert_equal(7, cbs.size, "cbs is #{cbs.inspect}")
80
+ assert_equal(['starttag', '<html>'], cbs[0])
81
+ assert_equal(['starttag', '<body bgcolor="#ffffff" width="123">'], cbs[1])
82
+ assert_equal(['data', "Fred"], cbs[3])
83
+ assert_equal(['endtag', '</body>'], cbs[5])
84
+ end
85
+
86
+ # FIXME should we insert <p> tags here?
87
+ def test_no_para_tags_in_body
88
+ cbs = callbacks_from { parser.feed('<html><body>Fred</body></html>') }
89
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
90
+ assert_equal(['endtag', '</body>'], cbs[3])
91
+ end
92
+
93
+ def test_empty_tag
94
+ cbs = callbacks_from { parser.feed('<html><body><img src="whatever"></body></html>') }
95
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
96
+ assert_equal(["starttag", '<img src="whatever">'], cbs[2])
97
+ end
98
+
99
+ def test_data
100
+ cbs = callbacks_from { parser.feed('<html><body><p>Data1</p><br><p>More_data</p></body></html>') }
101
+ assert_equal(11, cbs.size, "cbs is #{cbs.inspect}")
102
+ assert_equal(["data", "Data1"], cbs[3])
103
+ assert_equal(["data", "More_data"], cbs[7])
104
+ end
105
+
106
+ def test_whitespace_stripping
107
+ cbs = callbacks_from { parser.feed('<html><body><p> Data1 ') }
108
+ assert_equal(4, cbs.size, "cbs is #{cbs.inspect}")
109
+ assert_equal(["data", " Data1 "], cbs[3])
110
+
111
+ cbs = callbacks_from { parser.feed('</p><p> Data2 </p></body></html>') }
112
+ assert_equal(6, cbs.size, "cbs is #{cbs.inspect}")
113
+ assert_equal(["data", " Data2 "], cbs[2])
114
+ end
115
+
116
+ def test_script
117
+ cbs = callbacks_from { parser.feed <<EOS
118
+ <html><body>
119
+ <script type="text/javascript" language="Java_script">
120
+ <!--
121
+ var page_name = "Page_view_item";
122
+ //-->
123
+ </script>
124
+ </body></html>
125
+ EOS
126
+ }
127
+ assert_equal(12, cbs.size, "cbs is #{cbs.inspect}")
128
+ assert_equal(["starttag", '<script type="text/javascript" language="Java_script">'], cbs[3]);
129
+ assert_equal(["endtag", "</script>"], cbs[7])
130
+ end
131
+
132
+ def test_unknown_character
133
+ cbs = callbacks_from { parser.feed('<html><body>&#12345;</body></html>') }
134
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
135
+ assert_equal(["charref", "&#12345;"], cbs[2])
136
+ end
137
+
138
+ def test_unknown_entity
139
+ cbs = callbacks_from { parser.feed('<html><body>&fred;</body></html>') }
140
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
141
+ assert_equal(["entityref", "&fred;"], cbs[2])
142
+ end
143
+
144
+ def test_comment
145
+ cbs = callbacks_from { parser.feed('<html><body><!-- comment here --></body></html>') }
146
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
147
+ assert_equal(["comment", "<!-- comment here -->"], cbs[2])
148
+ end
149
+
150
+ #TODO is this right (w/the !)?
151
+ def test_special
152
+ cbs = callbacks_from { parser.feed <<EOS
153
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
154
+ <html><body></body></html>
155
+ EOS
156
+ }
157
+ assert_equal(7, cbs.size, "cbs is #{cbs.inspect}")
158
+ assert_equal(["special", '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'], cbs[0])
159
+ end
160
+ end
@@ -0,0 +1,196 @@
1
+ # Test cases for html-tree.rb
2
+ # Copyright (C) 2002 Ned Konz <ned@bike-nomad.com>
3
+ # License: Ruby's
4
+ # $Id: tc_stacking-parser.rb,v 1.4 2004/02/10 21:36:31 jhannes Exp $
5
+
6
+ require 'html/stparser'
7
+ require 'test/unit'
8
+
9
+ class TestStackingParser < HTML::StackingParser
10
+ def initialize(verbose, test_case)
11
+ super(verbose)
12
+ @test_case = test_case
13
+ end
14
+ attr_reader :test_case
15
+
16
+ def warn(msg); test_case.callback('warn', msg); end
17
+
18
+ def handle_comment(data); super; test_case.callback('comment', data); end
19
+ def handle_cdata(data); test_case.callback('data', data); end
20
+ def handle_start_tag(tag, attrs); test_case.callback('start_tag', tag, attrs); end
21
+ def handle_end_tag(tag); test_case.callback('end_tag', tag); end
22
+ def handle_empty_tag(tag, attrs); test_case.callback('empty_tag', tag, attrs); end
23
+ def handle_unknown_tag(tag, attrs); test_case.callback('unknown_tag', tag, attrs); end
24
+ def handle_missing_end_tag(tag); test_case.callback('missing_end_tag', tag); end;
25
+ def handle_extra_end_tag(tag); test_case.callback('extra_end_tag', tag); end
26
+ def handle_script(data); test_case.callback('script', data); end
27
+ def handle_unknown_character(name); test_case.callback('unknown_character', name); end
28
+ def handle_unknown_entity(name); test_case.callback('unknown_entity', name); end
29
+ def handle_special(data); test_case.callback('special', data); end
30
+ end
31
+
32
+ class StackingParserTestCase < Test::Unit::TestCase
33
+
34
+ def setup
35
+ @parser = TestStackingParser.new(true, self)
36
+ @callbacks = []
37
+ end
38
+
39
+ def callback(*stuff)
40
+ @callbacks << stuff
41
+ end
42
+
43
+ attr_reader :parser
44
+ attr_reader :callbacks
45
+
46
+ # run the given block and return the callbacks if any
47
+ def callbacks_from
48
+ @callbacks = []
49
+ yield
50
+ @callbacks
51
+ end
52
+
53
+ def test_empty_stack
54
+ # test stack empty at first
55
+ assert(parser.stack.empty?)
56
+ # test last_tag and parent_tag don't blow up with empty stack
57
+ assert_equal('html', parser.last_tag)
58
+ assert_equal('html', parser.parent_tag)
59
+ end
60
+
61
+ def test_empty_html
62
+ cbs = callbacks_from { parser.feed('<html>') }
63
+ assert_same(false, parser.stack.empty?)
64
+ assert_equal('html', parser.last_tag)
65
+ assert_equal('html', parser.parent_tag)
66
+ assert_equal(1, cbs.size, "cbs is #{cbs.inspect}")
67
+ assert_equal(['start_tag', 'html', []], cbs[0])
68
+
69
+ cbs = callbacks_from{ parser.feed('</html>') }
70
+ assert(parser.stack.empty?)
71
+ assert_equal(1, cbs.size, "cbs is #{cbs.inspect}")
72
+ assert_equal(['end_tag', 'html'], cbs[0])
73
+ end
74
+
75
+ def test_attribs
76
+ cbs = callbacks_from {
77
+ parser.feed('<html><body bgcolor="#ffffff" width="123"><p>Fred</p></body></html>')
78
+ }
79
+ assert_same(true, parser.stack.empty?)
80
+ assert_equal(7, cbs.size, "cbs is #{cbs.inspect}")
81
+ assert_equal(['start_tag', 'html', []], cbs[0])
82
+ assert_equal(["start_tag", "body", [["bgcolor", "#ffffff"], ["width", "123"]]], cbs[1])
83
+ assert_equal(['data', "Fred"], cbs[3])
84
+ end
85
+
86
+ # FIXME should we insert <p> tags here?
87
+ def test_no_para_tags_in_body
88
+ cbs = callbacks_from { parser.feed('<html><body>Fred</body></html>') }
89
+ assert_equal(true, parser.stack.empty?)
90
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
91
+ assert_equal(['data', 'Fred'], cbs[2])
92
+ end
93
+
94
+ def test_empty_tag
95
+ cbs = callbacks_from { parser.feed('<html><body><img src="whatever"></body></html>') }
96
+ assert_equal(true, parser.stack.empty?)
97
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
98
+ assert_equal(["empty_tag", "img", [["src", "whatever"]]], cbs[2])
99
+ end
100
+
101
+ def test_unknown_tag
102
+ cbs = callbacks_from { parser.feed('<html><body><froobzle a="b"></froobzle></body></html>') }
103
+ assert_equal(true, parser.stack.empty?)
104
+ assert_equal(6, cbs.size, "cbs is #{cbs.inspect}")
105
+ assert_equal(["unknown_tag", "froobzle", [["a", "b"]]], cbs[2])
106
+ end
107
+
108
+ def test_missing_end_tag
109
+ cbs = callbacks_from { parser.feed('<html><body><div></body></html>') }
110
+ assert_equal(true, parser.stack.empty?)
111
+ assert_equal(6, cbs.size, "cbs is #{cbs.inspect}")
112
+ assert_equal(["missing_end_tag", "div"], cbs[3])
113
+ end
114
+
115
+ def test_extra_end_tag
116
+ cbs = callbacks_from { parser.feed('<html><body></body></body></html>') }
117
+ assert_equal(true, parser.stack.empty?)
118
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
119
+ assert_equal(["extra_end_tag", "body"], cbs[3])
120
+ end
121
+
122
+ def test_data
123
+ cbs = callbacks_from { parser.feed('<html><body><p>Data1</p><br><p>More_data</p></body></html>') }
124
+ assert_equal(true, parser.stack.empty?)
125
+ assert_equal(11, cbs.size, "cbs is #{cbs.inspect}")
126
+ assert_equal(["data", "Data1"], cbs[3])
127
+ assert_equal(["data", "More_data"], cbs[7])
128
+ end
129
+
130
+ def test_whitespace_stripping
131
+ parser.strip_whitespace = false
132
+ cbs = callbacks_from { parser.feed('<html><body><p> Data1 ') }
133
+ assert_equal(4, cbs.size, "cbs is #{cbs.inspect}")
134
+ assert_equal(["data", " Data1 "], cbs[3])
135
+
136
+ parser.strip_whitespace = true
137
+ cbs = callbacks_from { parser.feed('</p><p> Data2 </p></body></html>') }
138
+ assert_equal(6, cbs.size, "cbs is #{cbs.inspect}")
139
+ assert_equal(["data", "Data2"], cbs[2])
140
+ assert_equal(true, parser.stack.empty?)
141
+ end
142
+
143
+ def test_script
144
+ parser.strip_whitespace = true
145
+ cbs = callbacks_from { parser.feed <<EOS
146
+ <html><body>
147
+ <script type="text/javascript" language="Java_script">
148
+ <!--
149
+ var page_name = "Page_view_item";
150
+ //-->
151
+ </script>
152
+ </body></html>
153
+ EOS
154
+ }
155
+ assert_equal([], parser.stack)
156
+ assert_equal(true, parser.stack.empty?)
157
+ assert_equal(7, cbs.size, "cbs is #{cbs.inspect}")
158
+ assert_equal(["start_tag", "script",
159
+ [["type", "text/javascript"], ["language", "Java_script"]]], cbs[2])
160
+ assert_equal(["script", "\n<!--\nvar page_name = \"Page_view_item\";\n//-->\n"], cbs[3])
161
+ assert_equal(["end_tag", "script"], cbs[4])
162
+ end
163
+
164
+ def test_unknown_character
165
+ cbs = callbacks_from { parser.feed('<html><body>&#12345;</body></html>') }
166
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
167
+ assert_equal(["unknown_character", "12345"], cbs[2])
168
+ end
169
+
170
+ def test_unknown_entity
171
+ cbs = callbacks_from { parser.feed('<html><body>&fred;</body></html>') }
172
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
173
+ assert_equal(["unknown_entity", "fred"], cbs[2])
174
+ end
175
+
176
+ def test_comment
177
+ parser.strip_whitespace = true
178
+ cbs = callbacks_from { parser.feed('<html><body><!-- comment here --></body></html>') }
179
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
180
+ assert_equal(["comment", "comment here"], cbs[2])
181
+ end
182
+
183
+ #TODO is this right (w/the !)?
184
+ def test_special
185
+ parser.strip_whitespace = true
186
+ cbs = callbacks_from { parser.feed <<EOS
187
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
188
+ <html><body></body></html>
189
+ EOS
190
+ }
191
+ assert_equal(5, cbs.size, "cbs is #{cbs.inspect}")
192
+ assert_equal(["special", '!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'], cbs[0])
193
+ end
194
+ end
195
+
196
+ $stdout.sync = true