libxml-ruby 4.1.2 → 5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY +17 -0
- data/README.rdoc +7 -16
- data/ext/libxml/ruby_libxml.h +43 -44
- data/ext/libxml/ruby_xml.c +0 -343
- data/ext/libxml/ruby_xml.h +9 -10
- data/ext/libxml/ruby_xml_attributes.h +2 -0
- data/ext/libxml/ruby_xml_document.c +6 -6
- data/ext/libxml/ruby_xml_document.h +11 -11
- data/ext/libxml/ruby_xml_dtd.c +85 -79
- data/ext/libxml/ruby_xml_encoding.h +20 -18
- data/ext/libxml/ruby_xml_error.c +9 -6
- data/ext/libxml/ruby_xml_error.h +2 -2
- data/ext/libxml/ruby_xml_html_parser_context.c +35 -21
- data/ext/libxml/ruby_xml_namespace.c +0 -3
- data/ext/libxml/ruby_xml_node.c +1394 -1398
- data/ext/libxml/ruby_xml_parser.h +1 -1
- data/ext/libxml/ruby_xml_parser_context.c +47 -39
- data/ext/libxml/ruby_xml_parser_options.c +9 -1
- data/ext/libxml/ruby_xml_parser_options.h +1 -1
- data/ext/libxml/ruby_xml_reader.c +15 -16
- data/ext/libxml/ruby_xml_sax2_handler.c +1 -1
- data/ext/libxml/ruby_xml_sax_parser.c +1 -9
- data/ext/libxml/ruby_xml_schema.c +4 -4
- data/ext/libxml/ruby_xml_version.h +5 -5
- data/ext/libxml/ruby_xml_writer.c +8 -8
- data/ext/libxml/ruby_xml_xpath.c +1 -1
- data/ext/libxml/ruby_xml_xpath_context.c +2 -2
- data/ext/libxml/ruby_xml_xpath_expression.c +1 -1
- data/lib/libxml/document.rb +15 -15
- data/lib/libxml/html_parser.rb +23 -23
- data/lib/libxml/parser.rb +26 -24
- data/test/test.rb +5 -0
- data/test/test_document.rb +8 -0
- data/test/test_document_write.rb +1 -4
- data/test/test_dtd.rb +5 -8
- data/test/test_encoding.rb +1 -4
- data/test/test_helper.rb +9 -2
- data/test/test_html_parser.rb +162 -162
- data/test/test_namespace.rb +1 -3
- data/test/test_node.rb +1 -3
- data/test/test_node_write.rb +1 -4
- data/test/test_parser.rb +26 -17
- data/test/test_reader.rb +4 -4
- data/test/test_sax_parser.rb +1 -1
- data/test/test_xml.rb +0 -99
- metadata +3 -2
data/lib/libxml/html_parser.rb
CHANGED
@@ -5,31 +5,31 @@ module LibXML
|
|
5
5
|
class HTMLParser
|
6
6
|
# call-seq:
|
7
7
|
# XML::HTMLParser.file(path) -> XML::HTMLParser
|
8
|
-
# XML::HTMLParser.file(path, :
|
9
|
-
#
|
8
|
+
# XML::HTMLParser.file(path, encoding: XML::Encoding::UTF_8,
|
9
|
+
# options: XML::HTMLParser::Options::NOENT) -> XML::HTMLParser
|
10
10
|
#
|
11
11
|
# Creates a new parser by parsing the specified file or uri.
|
12
12
|
#
|
13
|
-
#
|
14
|
-
# parsing is performed. Valid options are:
|
13
|
+
# Parameters:
|
15
14
|
#
|
15
|
+
# path - Path to file to parse
|
16
16
|
# encoding - The document encoding, defaults to nil. Valid values
|
17
17
|
# are the encoding constants defined on XML::Encoding.
|
18
18
|
# options - Parser options. Valid values are the constants defined on
|
19
19
|
# XML::HTMLParser::Options. Mutliple options can be combined
|
20
20
|
# by using Bitwise OR (|).
|
21
|
-
def self.file(path, options
|
21
|
+
def self.file(path, encoding: nil, options: nil)
|
22
22
|
context = XML::HTMLParser::Context.file(path)
|
23
|
-
context.encoding =
|
24
|
-
context.options = options
|
23
|
+
context.encoding = encoding if encoding
|
24
|
+
context.options = options if options
|
25
25
|
self.new(context)
|
26
26
|
end
|
27
27
|
|
28
28
|
# call-seq:
|
29
29
|
# XML::HTMLParser.io(io) -> XML::HTMLParser
|
30
|
-
# XML::HTMLParser.io(io, :
|
31
|
-
#
|
32
|
-
#
|
30
|
+
# XML::HTMLParser.io(io, encoding: XML::Encoding::UTF_8,
|
31
|
+
# options: XML::HTMLParser::Options::NOENT
|
32
|
+
# base_uri: "http://libxml.org") -> XML::HTMLParser
|
33
33
|
#
|
34
34
|
# Creates a new reader by parsing the specified io object.
|
35
35
|
#
|
@@ -42,36 +42,36 @@ module LibXML
|
|
42
42
|
# options - Parser options. Valid values are the constants defined on
|
43
43
|
# XML::HTMLParser::Options. Mutliple options can be combined
|
44
44
|
# by using Bitwise OR (|).
|
45
|
-
def self.io(io, options
|
45
|
+
def self.io(io, base_uri: nil, encoding: nil, options: nil)
|
46
46
|
context = XML::HTMLParser::Context.io(io)
|
47
|
-
context.base_uri =
|
48
|
-
context.encoding =
|
49
|
-
context.options = options
|
47
|
+
context.base_uri = base_uri if base_uri
|
48
|
+
context.encoding = encoding if encoding
|
49
|
+
context.options = options if options
|
50
50
|
self.new(context)
|
51
51
|
end
|
52
52
|
|
53
53
|
# call-seq:
|
54
54
|
# XML::HTMLParser.string(string)
|
55
|
-
# XML::HTMLParser.string(string, :
|
56
|
-
#
|
57
|
-
#
|
55
|
+
# XML::HTMLParser.string(string, encoding: XML::Encoding::UTF_8,
|
56
|
+
# options: XML::HTMLParser::Options::NOENT
|
57
|
+
# base_uri: "http://libxml.org") -> XML::HTMLParser
|
58
58
|
#
|
59
59
|
# Creates a new parser by parsing the specified string.
|
60
60
|
#
|
61
|
-
#
|
62
|
-
# parsing is performed. Valid options are:
|
61
|
+
# Parameters:
|
63
62
|
#
|
63
|
+
# string - String to parse
|
64
64
|
# base_uri - The base url for the parsed document.
|
65
65
|
# encoding - The document encoding, defaults to nil. Valid values
|
66
66
|
# are the encoding constants defined on XML::Encoding.
|
67
67
|
# options - Parser options. Valid values are the constants defined on
|
68
68
|
# XML::HTMLParser::Options. Mutliple options can be combined
|
69
69
|
# by using Bitwise OR (|).
|
70
|
-
def self.string(string, options
|
70
|
+
def self.string(string, base_uri: nil, encoding: nil, options: nil)
|
71
71
|
context = XML::HTMLParser::Context.string(string)
|
72
|
-
context.base_uri =
|
73
|
-
context.encoding =
|
74
|
-
context.options = options
|
72
|
+
context.base_uri = base_uri if base_uri
|
73
|
+
context.encoding = encoding if encoding
|
74
|
+
context.options = options if options
|
75
75
|
self.new(context)
|
76
76
|
end
|
77
77
|
|
data/lib/libxml/parser.rb
CHANGED
@@ -18,31 +18,33 @@ module LibXML
|
|
18
18
|
|
19
19
|
# call-seq:
|
20
20
|
# XML::Parser.file(path) -> XML::Parser
|
21
|
-
# XML::Parser.file(path, :
|
22
|
-
# :
|
21
|
+
# XML::Parser.file(path, encoding: XML::Encoding::UTF_8,
|
22
|
+
# options: XML::Parser::Options::NOENT) -> XML::Parser
|
23
23
|
#
|
24
24
|
# Creates a new parser for the specified file or uri.
|
25
25
|
#
|
26
|
-
#
|
27
|
-
# parsing is performed. Valid options are:
|
26
|
+
# Parameters:
|
28
27
|
#
|
28
|
+
# path - Path to file
|
29
|
+
# base_uri - The base url for the parsed document.
|
29
30
|
# encoding - The document encoding, defaults to nil. Valid values
|
30
31
|
# are the encoding constants defined on XML::Encoding.
|
31
32
|
# options - Parser options. Valid values are the constants defined on
|
32
33
|
# XML::Parser::Options. Mutliple options can be combined
|
33
34
|
# by using Bitwise OR (|).
|
34
|
-
def self.file(path, options
|
35
|
+
def self.file(path, base_uri: nil, encoding: nil, options: nil)
|
35
36
|
context = XML::Parser::Context.file(path)
|
36
|
-
context.
|
37
|
-
context.
|
37
|
+
context.base_uri = base_uri if base_uri
|
38
|
+
context.encoding = encoding if encoding
|
39
|
+
context.options = options if options
|
38
40
|
self.new(context)
|
39
41
|
end
|
40
42
|
|
41
43
|
# call-seq:
|
42
44
|
# XML::Parser.io(io) -> XML::Parser
|
43
|
-
# XML::Parser.io(io, :
|
44
|
-
# :
|
45
|
-
# :
|
45
|
+
# XML::Parser.io(io, encoding: XML::Encoding::UTF_8,
|
46
|
+
# options: XML::Parser::Options::NOENT
|
47
|
+
# base_uri: "http://libxml.org") -> XML::Parser
|
46
48
|
#
|
47
49
|
# Creates a new parser for the specified io object.
|
48
50
|
#
|
@@ -55,36 +57,36 @@ module LibXML
|
|
55
57
|
# options - Parser options. Valid values are the constants defined on
|
56
58
|
# XML::Parser::Options. Mutliple options can be combined
|
57
59
|
# by using Bitwise OR (|).
|
58
|
-
def self.io(io, options
|
60
|
+
def self.io(io, base_uri: nil, encoding: nil, options: nil)
|
59
61
|
context = XML::Parser::Context.io(io)
|
60
|
-
context.base_uri =
|
61
|
-
context.encoding =
|
62
|
-
context.options = options
|
62
|
+
context.base_uri = base_uri if base_uri
|
63
|
+
context.encoding = encoding if encoding
|
64
|
+
context.options = options if options
|
63
65
|
self.new(context)
|
64
66
|
end
|
65
67
|
|
66
68
|
# call-seq:
|
67
69
|
# XML::Parser.string(string)
|
68
|
-
# XML::Parser.string(string, :
|
69
|
-
# :
|
70
|
-
# :
|
70
|
+
# XML::Parser.string(string, encoding: XML::Encoding::UTF_8,
|
71
|
+
# options: XML::Parser::Options::NOENT
|
72
|
+
# base_uri: "http://libxml.org") -> XML::Parser
|
71
73
|
#
|
72
74
|
# Creates a new parser by parsing the specified string.
|
73
75
|
#
|
74
|
-
#
|
75
|
-
# parsing is performed. Valid options are:
|
76
|
+
# Parameters:
|
76
77
|
#
|
78
|
+
# string - The string to parse
|
77
79
|
# base_uri - The base url for the parsed document.
|
78
80
|
# encoding - The document encoding, defaults to nil. Valid values
|
79
81
|
# are the encoding constants defined on XML::Encoding.
|
80
82
|
# options - Parser options. Valid values are the constants defined on
|
81
|
-
# XML::Parser::Options.
|
83
|
+
# XML::Parser::Options. Multiple options can be combined
|
82
84
|
# by using Bitwise OR (|).
|
83
|
-
def self.string(string, options
|
85
|
+
def self.string(string, base_uri: nil, encoding: nil, options: nil)
|
84
86
|
context = XML::Parser::Context.string(string)
|
85
|
-
context.base_uri =
|
86
|
-
context.encoding =
|
87
|
-
context.options = options
|
87
|
+
context.base_uri = base_uri if base_uri
|
88
|
+
context.encoding = encoding if encoding
|
89
|
+
context.options = options if options
|
88
90
|
self.new(context)
|
89
91
|
end
|
90
92
|
|
data/test/test.rb
ADDED
data/test/test_document.rb
CHANGED
@@ -129,4 +129,12 @@ class TestDocument < Minitest::Test
|
|
129
129
|
file = File.join(File.dirname(__FILE__), 'model/atom.xml')
|
130
130
|
schema_document = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NONET)
|
131
131
|
end
|
132
|
+
|
133
|
+
def test_io
|
134
|
+
File.open(File.join(File.dirname(__FILE__), 'model/rubynet.xml')) do |io|
|
135
|
+
doc = LibXML::XML::Document.io(io)
|
136
|
+
assert_instance_of(LibXML::XML::Document, doc)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
132
140
|
end
|
data/test/test_document_write.rb
CHANGED
@@ -7,14 +7,11 @@ class TestDocumentWrite < Minitest::Test
|
|
7
7
|
def setup
|
8
8
|
@file_name = "model/bands.utf-8.xml"
|
9
9
|
|
10
|
-
# Strip spaces to make testing easier
|
11
|
-
LibXML::XML.default_keep_blanks = false
|
12
10
|
file = File.join(File.dirname(__FILE__), @file_name)
|
13
|
-
@doc = LibXML::XML::Document.file(file)
|
11
|
+
@doc = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NOBLANKS)
|
14
12
|
end
|
15
13
|
|
16
14
|
def teardown
|
17
|
-
LibXML::XML.default_keep_blanks = true
|
18
15
|
@doc = nil
|
19
16
|
end
|
20
17
|
|
data/test/test_dtd.rb
CHANGED
@@ -31,13 +31,13 @@ class TestDtd < Minitest::Test
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_internal_subset
|
34
|
-
xhtml_dtd = LibXML::XML::Dtd.new
|
34
|
+
xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil, nil, true)
|
35
35
|
assert xhtml_dtd.name.nil?
|
36
36
|
assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
|
37
37
|
assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
|
38
38
|
assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id
|
39
39
|
|
40
|
-
xhtml_dtd = LibXML::XML::Dtd.new
|
40
|
+
xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1", nil, true)
|
41
41
|
assert_equal "xhtml1", xhtml_dtd.name
|
42
42
|
assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
|
43
43
|
assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
|
@@ -45,13 +45,13 @@ class TestDtd < Minitest::Test
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def test_external_subset
|
48
|
-
xhtml_dtd = LibXML::XML::Dtd.new
|
48
|
+
xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil)
|
49
49
|
assert xhtml_dtd.name.nil?
|
50
50
|
assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
|
51
51
|
assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
|
52
52
|
assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id
|
53
53
|
|
54
|
-
xhtml_dtd = LibXML::XML::Dtd.new
|
54
|
+
xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1")
|
55
55
|
assert_equal "xhtml1", xhtml_dtd.name
|
56
56
|
assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
|
57
57
|
assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
|
@@ -106,13 +106,11 @@ class TestDtd < Minitest::Test
|
|
106
106
|
errors << error
|
107
107
|
end
|
108
108
|
|
109
|
-
LibXML::XML.default_load_external_dtd = false
|
110
109
|
LibXML::XML::Parser.string(xml).parse
|
111
110
|
assert_equal(0, errors.length)
|
112
111
|
|
113
112
|
errors.clear
|
114
|
-
LibXML::XML.
|
115
|
-
LibXML::XML::Parser.string(xml).parse
|
113
|
+
LibXML::XML::Parser.string(xml, options: LibXML::XML::Parser::Options::DTDLOAD).parse
|
116
114
|
assert_equal(1, errors.length)
|
117
115
|
assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.",
|
118
116
|
errors[0].to_s)
|
@@ -123,7 +121,6 @@ class TestDtd < Minitest::Test
|
|
123
121
|
assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.",
|
124
122
|
errors[0].to_s)
|
125
123
|
ensure
|
126
|
-
LibXML::XML.default_load_external_dtd = false
|
127
124
|
LibXML::XML::Error.reset_handler
|
128
125
|
end
|
129
126
|
end
|
data/test/test_encoding.rb
CHANGED
@@ -38,10 +38,7 @@ class TestEncoding < Minitest::Test
|
|
38
38
|
@encoding = encoding
|
39
39
|
file = file_for_encoding(encoding)
|
40
40
|
|
41
|
-
|
42
|
-
LibXML::XML.default_keep_blanks = false
|
43
|
-
@doc = LibXML::XML::Document.file(file)
|
44
|
-
LibXML::XML.default_keep_blanks = true
|
41
|
+
@doc = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NOBLANKS)
|
45
42
|
end
|
46
43
|
|
47
44
|
def test_encoding
|
data/test/test_helper.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
# To make testing/debugging easier, test within this source tree versus an installed gem
|
4
|
-
|
5
4
|
require 'bundler/setup'
|
6
|
-
|
5
|
+
|
6
|
+
# Add ext directory to load path to make it easier to test locally built extensions
|
7
|
+
ext_path = File.expand_path(File.join(__dir__, '..', 'ext', 'libxml'))
|
8
|
+
$LOAD_PATH.unshift(File.expand_path(ext_path))
|
9
|
+
|
10
|
+
# Now load code
|
7
11
|
require 'libxml-ruby'
|
8
12
|
|
9
13
|
def windows?
|
@@ -11,3 +15,6 @@ def windows?
|
|
11
15
|
end
|
12
16
|
|
13
17
|
STDOUT.write "\nlibxml2: #{LibXML::XML::LIBXML_VERSION}\n#{RUBY_DESCRIPTION}\n\n"
|
18
|
+
|
19
|
+
require 'minitest/autorun'
|
20
|
+
|
data/test/test_html_parser.rb
CHANGED
@@ -1,162 +1,162 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
require_relative './test_helper'
|
4
|
-
require 'stringio'
|
5
|
-
|
6
|
-
class HTMLParserTest < Minitest::Test
|
7
|
-
def html_file
|
8
|
-
File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
|
9
|
-
end
|
10
|
-
|
11
|
-
# ----- Sources ------
|
12
|
-
def test_file
|
13
|
-
xp = LibXML::XML::HTMLParser.file(html_file)
|
14
|
-
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
15
|
-
doc = xp.parse
|
16
|
-
refute_nil(doc)
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_noexistent_file
|
20
|
-
error = assert_raises(LibXML::XML::Error) do
|
21
|
-
LibXML::XML::HTMLParser.file('i_dont_exist.xml')
|
22
|
-
end
|
23
|
-
|
24
|
-
assert_equal('Warning: failed to load external entity "i_dont_exist.xml".', error.to_s)
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_nil_file
|
28
|
-
error = assert_raises(TypeError) do
|
29
|
-
LibXML::XML::HTMLParser.file(nil)
|
30
|
-
end
|
31
|
-
|
32
|
-
assert_match(/nil into String/, error.to_s)
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_io
|
36
|
-
File.open(html_file) do |io|
|
37
|
-
xp = LibXML::XML::HTMLParser.io(io)
|
38
|
-
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
39
|
-
|
40
|
-
doc = xp.parse
|
41
|
-
assert_instance_of(LibXML::XML::Document, doc)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_io_gc
|
46
|
-
# Test that the reader keeps a reference
|
47
|
-
# to the io object
|
48
|
-
file = File.open(html_file)
|
49
|
-
parser = LibXML::XML::HTMLParser.io(file)
|
50
|
-
file = nil
|
51
|
-
GC.start
|
52
|
-
assert(parser.parse)
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_nil_io
|
56
|
-
error = assert_raises(TypeError) do
|
57
|
-
LibXML::XML::HTMLParser.io(nil)
|
58
|
-
end
|
59
|
-
|
60
|
-
assert_equal("Must pass in an IO object", error.to_s)
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_string_io
|
64
|
-
data = File.read(html_file)
|
65
|
-
io = StringIO.new(data)
|
66
|
-
xp = LibXML::XML::HTMLParser.io(io)
|
67
|
-
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
68
|
-
|
69
|
-
doc = xp.parse
|
70
|
-
assert_instance_of(LibXML::XML::Document, doc)
|
71
|
-
end
|
72
|
-
|
73
|
-
def test_string
|
74
|
-
str = '<html><body><p>hi</p></body></html>'
|
75
|
-
xp = LibXML::XML::HTMLParser.string(str)
|
76
|
-
|
77
|
-
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
78
|
-
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
79
|
-
|
80
|
-
doc = xp.parse
|
81
|
-
assert_instance_of(LibXML::XML::Document, doc)
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_nil_string
|
85
|
-
error = assert_raises(TypeError) do
|
86
|
-
LibXML::XML::HTMLParser.string(nil)
|
87
|
-
end
|
88
|
-
|
89
|
-
assert_equal("wrong argument type nil (expected String)", error.to_s)
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_parse
|
93
|
-
html = <<-EOS
|
94
|
-
<html>
|
95
|
-
<head>
|
96
|
-
<meta name=keywords content=nasty>
|
97
|
-
</head>
|
98
|
-
<body>Hello<br>World</html>
|
99
|
-
EOS
|
100
|
-
|
101
|
-
parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOBLANKS)
|
102
|
-
doc = parser.parse
|
103
|
-
assert_instance_of LibXML::XML::Document, doc
|
104
|
-
|
105
|
-
root = doc.root
|
106
|
-
assert_instance_of LibXML::XML::Node, root
|
107
|
-
assert_equal 'html', root.name
|
108
|
-
|
109
|
-
head = root.child
|
110
|
-
assert_instance_of LibXML::XML::Node, head
|
111
|
-
assert_equal 'head', head.name
|
112
|
-
|
113
|
-
meta = head.child
|
114
|
-
assert_instance_of LibXML::XML::Node, meta
|
115
|
-
assert_equal 'meta', meta.name
|
116
|
-
assert_equal 'keywords', meta[:name]
|
117
|
-
assert_equal 'nasty', meta[:content]
|
118
|
-
|
119
|
-
body = head.next
|
120
|
-
assert_instance_of LibXML::XML::Node, body
|
121
|
-
assert_equal 'body', body.name
|
122
|
-
|
123
|
-
hello = body.child
|
124
|
-
# It appears that some versions of libxml2 add a layer of <p>
|
125
|
-
# cant figure our why or how, so this skips it if there
|
126
|
-
hello = hello.child if hello.name == "p"
|
127
|
-
|
128
|
-
assert_instance_of LibXML::XML::Node, hello
|
129
|
-
assert_equal 'Hello', hello.content
|
130
|
-
|
131
|
-
br = hello.next
|
132
|
-
assert_instance_of LibXML::XML::Node, br
|
133
|
-
assert_equal 'br', br.name
|
134
|
-
|
135
|
-
world = br.next
|
136
|
-
assert_instance_of LibXML::XML::Node, world
|
137
|
-
assert_equal 'World', world.content
|
138
|
-
end
|
139
|
-
|
140
|
-
def test_no_implied
|
141
|
-
html = "hello world"
|
142
|
-
parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOIMPLIED)
|
143
|
-
doc = parser.parse
|
144
|
-
assert_equal("<p>#{html}</p>", doc.root.to_s)
|
145
|
-
end
|
146
|
-
|
147
|
-
def test_comment
|
148
|
-
doc = LibXML::XML::HTMLParser.string('<!-- stuff -->', :options => LibXML::XML::HTMLParser::Options::NOIMPLIED |
|
149
|
-
LibXML::XML::HTMLParser::Options::NOERROR |
|
150
|
-
LibXML::XML::HTMLParser::Options::NOWARNING |
|
151
|
-
LibXML::XML::HTMLParser::Options::RECOVER |
|
152
|
-
LibXML::XML::HTMLParser::Options::NONET)
|
153
|
-
assert(doc)
|
154
|
-
end
|
155
|
-
|
156
|
-
def test_open_many_files
|
157
|
-
file = File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
|
158
|
-
1000.times do
|
159
|
-
LibXML::XML::HTMLParser.file(file).parse
|
160
|
-
end
|
161
|
-
end
|
162
|
-
end
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require_relative './test_helper'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
class HTMLParserTest < Minitest::Test
|
7
|
+
def html_file
|
8
|
+
File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
|
9
|
+
end
|
10
|
+
|
11
|
+
# ----- Sources ------
|
12
|
+
def test_file
|
13
|
+
xp = LibXML::XML::HTMLParser.file(html_file)
|
14
|
+
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
15
|
+
doc = xp.parse
|
16
|
+
refute_nil(doc)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_noexistent_file
|
20
|
+
error = assert_raises(LibXML::XML::Error) do
|
21
|
+
LibXML::XML::HTMLParser.file('i_dont_exist.xml')
|
22
|
+
end
|
23
|
+
|
24
|
+
assert_equal('Warning: failed to load external entity "i_dont_exist.xml".', error.to_s)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_nil_file
|
28
|
+
error = assert_raises(TypeError) do
|
29
|
+
LibXML::XML::HTMLParser.file(nil)
|
30
|
+
end
|
31
|
+
|
32
|
+
assert_match(/nil into String/, error.to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_io
|
36
|
+
File.open(html_file) do |io|
|
37
|
+
xp = LibXML::XML::HTMLParser.io(io)
|
38
|
+
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
39
|
+
|
40
|
+
doc = xp.parse
|
41
|
+
assert_instance_of(LibXML::XML::Document, doc)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_io_gc
|
46
|
+
# Test that the reader keeps a reference
|
47
|
+
# to the io object
|
48
|
+
file = File.open(html_file)
|
49
|
+
parser = LibXML::XML::HTMLParser.io(file)
|
50
|
+
file = nil
|
51
|
+
GC.start
|
52
|
+
assert(parser.parse)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_nil_io
|
56
|
+
error = assert_raises(TypeError) do
|
57
|
+
LibXML::XML::HTMLParser.io(nil)
|
58
|
+
end
|
59
|
+
|
60
|
+
assert_equal("Must pass in an IO object", error.to_s)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_string_io
|
64
|
+
data = File.read(html_file)
|
65
|
+
io = StringIO.new(data)
|
66
|
+
xp = LibXML::XML::HTMLParser.io(io)
|
67
|
+
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
68
|
+
|
69
|
+
doc = xp.parse
|
70
|
+
assert_instance_of(LibXML::XML::Document, doc)
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_string
|
74
|
+
str = '<html><body><p>hi</p></body></html>'
|
75
|
+
xp = LibXML::XML::HTMLParser.string(str)
|
76
|
+
|
77
|
+
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
78
|
+
assert_instance_of(LibXML::XML::HTMLParser, xp)
|
79
|
+
|
80
|
+
doc = xp.parse
|
81
|
+
assert_instance_of(LibXML::XML::Document, doc)
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_nil_string
|
85
|
+
error = assert_raises(TypeError) do
|
86
|
+
LibXML::XML::HTMLParser.string(nil)
|
87
|
+
end
|
88
|
+
|
89
|
+
assert_equal("wrong argument type nil (expected String)", error.to_s)
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_parse
|
93
|
+
html = <<-EOS
|
94
|
+
<html>
|
95
|
+
<head>
|
96
|
+
<meta name=keywords content=nasty>
|
97
|
+
</head>
|
98
|
+
<body>Hello<br>World</html>
|
99
|
+
EOS
|
100
|
+
|
101
|
+
parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOBLANKS)
|
102
|
+
doc = parser.parse
|
103
|
+
assert_instance_of LibXML::XML::Document, doc
|
104
|
+
|
105
|
+
root = doc.root
|
106
|
+
assert_instance_of LibXML::XML::Node, root
|
107
|
+
assert_equal 'html', root.name
|
108
|
+
|
109
|
+
head = root.child
|
110
|
+
assert_instance_of LibXML::XML::Node, head
|
111
|
+
assert_equal 'head', head.name
|
112
|
+
|
113
|
+
meta = head.child
|
114
|
+
assert_instance_of LibXML::XML::Node, meta
|
115
|
+
assert_equal 'meta', meta.name
|
116
|
+
assert_equal 'keywords', meta[:name]
|
117
|
+
assert_equal 'nasty', meta[:content]
|
118
|
+
|
119
|
+
body = head.next
|
120
|
+
assert_instance_of LibXML::XML::Node, body
|
121
|
+
assert_equal 'body', body.name
|
122
|
+
|
123
|
+
hello = body.child
|
124
|
+
# It appears that some versions of libxml2 add a layer of <p>
|
125
|
+
# cant figure our why or how, so this skips it if there
|
126
|
+
hello = hello.child if hello.name == "p"
|
127
|
+
|
128
|
+
assert_instance_of LibXML::XML::Node, hello
|
129
|
+
assert_equal 'Hello', hello.content
|
130
|
+
|
131
|
+
br = hello.next
|
132
|
+
assert_instance_of LibXML::XML::Node, br
|
133
|
+
assert_equal 'br', br.name
|
134
|
+
|
135
|
+
world = br.next
|
136
|
+
assert_instance_of LibXML::XML::Node, world
|
137
|
+
assert_equal 'World', world.content
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_no_implied
|
141
|
+
html = "hello world"
|
142
|
+
parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOIMPLIED)
|
143
|
+
doc = parser.parse
|
144
|
+
assert_equal("<p>#{html}</p>", doc.root.to_s)
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_comment
|
148
|
+
doc = LibXML::XML::HTMLParser.string('<!-- stuff -->', :options => LibXML::XML::HTMLParser::Options::NOIMPLIED |
|
149
|
+
LibXML::XML::HTMLParser::Options::NOERROR |
|
150
|
+
LibXML::XML::HTMLParser::Options::NOWARNING |
|
151
|
+
LibXML::XML::HTMLParser::Options::RECOVER |
|
152
|
+
LibXML::XML::HTMLParser::Options::NONET)
|
153
|
+
assert(doc)
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_open_many_files
|
157
|
+
file = File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
|
158
|
+
1000.times do
|
159
|
+
LibXML::XML::HTMLParser.file(file).parse
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
data/test/test_namespace.rb
CHANGED
@@ -37,9 +37,7 @@ class TestNS < Minitest::Test
|
|
37
37
|
def test_duplicate_ns
|
38
38
|
node = LibXML::XML::Node.new('foo')
|
39
39
|
LibXML::XML::Namespace.new(node, 'myname', 'http://www.mynamespace.com')
|
40
|
-
|
41
|
-
LibXML::XML::Namespace.new(node, 'myname', 'http://www.mynamespace.com')
|
42
|
-
end
|
40
|
+
LibXML::XML::Namespace.new(node, 'myname', 'http://www.mynamespace.com')
|
43
41
|
end
|
44
42
|
|
45
43
|
def test_eql
|