libxml-ruby 4.1.2 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY +17 -0
  3. data/README.rdoc +7 -16
  4. data/ext/libxml/ruby_libxml.h +43 -44
  5. data/ext/libxml/ruby_xml.c +0 -343
  6. data/ext/libxml/ruby_xml.h +9 -10
  7. data/ext/libxml/ruby_xml_attributes.h +2 -0
  8. data/ext/libxml/ruby_xml_document.c +6 -6
  9. data/ext/libxml/ruby_xml_document.h +11 -11
  10. data/ext/libxml/ruby_xml_dtd.c +85 -79
  11. data/ext/libxml/ruby_xml_encoding.h +20 -18
  12. data/ext/libxml/ruby_xml_error.c +9 -6
  13. data/ext/libxml/ruby_xml_error.h +2 -2
  14. data/ext/libxml/ruby_xml_html_parser_context.c +35 -21
  15. data/ext/libxml/ruby_xml_namespace.c +0 -3
  16. data/ext/libxml/ruby_xml_node.c +1394 -1398
  17. data/ext/libxml/ruby_xml_parser.h +1 -1
  18. data/ext/libxml/ruby_xml_parser_context.c +47 -39
  19. data/ext/libxml/ruby_xml_parser_options.c +9 -1
  20. data/ext/libxml/ruby_xml_parser_options.h +1 -1
  21. data/ext/libxml/ruby_xml_reader.c +15 -16
  22. data/ext/libxml/ruby_xml_sax2_handler.c +1 -1
  23. data/ext/libxml/ruby_xml_sax_parser.c +1 -9
  24. data/ext/libxml/ruby_xml_schema.c +4 -4
  25. data/ext/libxml/ruby_xml_version.h +5 -5
  26. data/ext/libxml/ruby_xml_writer.c +8 -8
  27. data/ext/libxml/ruby_xml_xpath.c +1 -1
  28. data/ext/libxml/ruby_xml_xpath_context.c +2 -2
  29. data/ext/libxml/ruby_xml_xpath_expression.c +1 -1
  30. data/lib/libxml/document.rb +15 -15
  31. data/lib/libxml/html_parser.rb +23 -23
  32. data/lib/libxml/parser.rb +26 -24
  33. data/test/test.rb +5 -0
  34. data/test/test_document.rb +8 -0
  35. data/test/test_document_write.rb +1 -4
  36. data/test/test_dtd.rb +5 -8
  37. data/test/test_encoding.rb +1 -4
  38. data/test/test_helper.rb +9 -2
  39. data/test/test_html_parser.rb +162 -162
  40. data/test/test_namespace.rb +1 -3
  41. data/test/test_node.rb +1 -3
  42. data/test/test_node_write.rb +1 -4
  43. data/test/test_parser.rb +26 -17
  44. data/test/test_reader.rb +4 -4
  45. data/test/test_sax_parser.rb +1 -1
  46. data/test/test_xml.rb +0 -99
  47. metadata +3 -2
@@ -5,31 +5,31 @@ module LibXML
5
5
  class HTMLParser
6
6
  # call-seq:
7
7
  # XML::HTMLParser.file(path) -> XML::HTMLParser
8
- # XML::HTMLParser.file(path, :encoding => XML::Encoding::UTF_8,
9
- # :options => XML::HTMLParser::Options::NOENT) -> XML::HTMLParser
8
+ # XML::HTMLParser.file(path, encoding: XML::Encoding::UTF_8,
9
+ # options: XML::HTMLParser::Options::NOENT) -> XML::HTMLParser
10
10
  #
11
11
  # Creates a new parser by parsing the specified file or uri.
12
12
  #
13
- # You may provide an optional hash table to control how the
14
- # parsing is performed. Valid options are:
13
+ # Parameters:
15
14
  #
15
+ # path - Path to file to parse
16
16
  # encoding - The document encoding, defaults to nil. Valid values
17
17
  # are the encoding constants defined on XML::Encoding.
18
18
  # options - Parser options. Valid values are the constants defined on
19
19
  # XML::HTMLParser::Options. Mutliple options can be combined
20
20
  # by using Bitwise OR (|).
21
- def self.file(path, options = {})
21
+ def self.file(path, encoding: nil, options: nil)
22
22
  context = XML::HTMLParser::Context.file(path)
23
- context.encoding = options[:encoding] if options[:encoding]
24
- context.options = options[:options] if options[:options]
23
+ context.encoding = encoding if encoding
24
+ context.options = options if options
25
25
  self.new(context)
26
26
  end
27
27
 
28
28
  # call-seq:
29
29
  # XML::HTMLParser.io(io) -> XML::HTMLParser
30
- # XML::HTMLParser.io(io, :encoding => XML::Encoding::UTF_8,
31
- # :options => XML::HTMLParser::Options::NOENT
32
- # :base_uri="http://libxml.org") -> XML::HTMLParser
30
+ # XML::HTMLParser.io(io, encoding: XML::Encoding::UTF_8,
31
+ # options: XML::HTMLParser::Options::NOENT
32
+ # base_uri: "http://libxml.org") -> XML::HTMLParser
33
33
  #
34
34
  # Creates a new reader by parsing the specified io object.
35
35
  #
@@ -42,36 +42,36 @@ module LibXML
42
42
  # options - Parser options. Valid values are the constants defined on
43
43
  # XML::HTMLParser::Options. Mutliple options can be combined
44
44
  # by using Bitwise OR (|).
45
- def self.io(io, options = {})
45
+ def self.io(io, base_uri: nil, encoding: nil, options: nil)
46
46
  context = XML::HTMLParser::Context.io(io)
47
- context.base_uri = options[:base_uri] if options[:base_uri]
48
- context.encoding = options[:encoding] if options[:encoding]
49
- context.options = options[:options] if options[:options]
47
+ context.base_uri = base_uri if base_uri
48
+ context.encoding = encoding if encoding
49
+ context.options = options if options
50
50
  self.new(context)
51
51
  end
52
52
 
53
53
  # call-seq:
54
54
  # XML::HTMLParser.string(string)
55
- # XML::HTMLParser.string(string, :encoding => XML::Encoding::UTF_8,
56
- # :options => XML::HTMLParser::Options::NOENT
57
- # :base_uri="http://libxml.org") -> XML::HTMLParser
55
+ # XML::HTMLParser.string(string, encoding: XML::Encoding::UTF_8,
56
+ # options: XML::HTMLParser::Options::NOENT
57
+ # base_uri: "http://libxml.org") -> XML::HTMLParser
58
58
  #
59
59
  # Creates a new parser by parsing the specified string.
60
60
  #
61
- # You may provide an optional hash table to control how the
62
- # parsing is performed. Valid options are:
61
+ # Parameters:
63
62
  #
63
+ # string - String to parse
64
64
  # base_uri - The base url for the parsed document.
65
65
  # encoding - The document encoding, defaults to nil. Valid values
66
66
  # are the encoding constants defined on XML::Encoding.
67
67
  # options - Parser options. Valid values are the constants defined on
68
68
  # XML::HTMLParser::Options. Mutliple options can be combined
69
69
  # by using Bitwise OR (|).
70
- def self.string(string, options = {})
70
+ def self.string(string, base_uri: nil, encoding: nil, options: nil)
71
71
  context = XML::HTMLParser::Context.string(string)
72
- context.base_uri = options[:base_uri] if options[:base_uri]
73
- context.encoding = options[:encoding] if options[:encoding]
74
- context.options = options[:options] if options[:options]
72
+ context.base_uri = base_uri if base_uri
73
+ context.encoding = encoding if encoding
74
+ context.options = options if options
75
75
  self.new(context)
76
76
  end
77
77
 
data/lib/libxml/parser.rb CHANGED
@@ -18,31 +18,33 @@ module LibXML
18
18
 
19
19
  # call-seq:
20
20
  # XML::Parser.file(path) -> XML::Parser
21
- # XML::Parser.file(path, :encoding => XML::Encoding::UTF_8,
22
- # :options => XML::Parser::Options::NOENT) -> XML::Parser
21
+ # XML::Parser.file(path, encoding: XML::Encoding::UTF_8,
22
+ # options: XML::Parser::Options::NOENT) -> XML::Parser
23
23
  #
24
24
  # Creates a new parser for the specified file or uri.
25
25
  #
26
- # You may provide an optional hash table to control how the
27
- # parsing is performed. Valid options are:
26
+ # Parameters:
28
27
  #
28
+ # path - Path to file
29
+ # base_uri - The base url for the parsed document.
29
30
  # encoding - The document encoding, defaults to nil. Valid values
30
31
  # are the encoding constants defined on XML::Encoding.
31
32
  # options - Parser options. Valid values are the constants defined on
32
33
  # XML::Parser::Options. Mutliple options can be combined
33
34
  # by using Bitwise OR (|).
34
- def self.file(path, options = {})
35
+ def self.file(path, base_uri: nil, encoding: nil, options: nil)
35
36
  context = XML::Parser::Context.file(path)
36
- context.encoding = options[:encoding] if options[:encoding]
37
- context.options = options[:options] if options[:options]
37
+ context.base_uri = base_uri if base_uri
38
+ context.encoding = encoding if encoding
39
+ context.options = options if options
38
40
  self.new(context)
39
41
  end
40
42
 
41
43
  # call-seq:
42
44
  # XML::Parser.io(io) -> XML::Parser
43
- # XML::Parser.io(io, :encoding => XML::Encoding::UTF_8,
44
- # :options => XML::Parser::Options::NOENT
45
- # :base_uri="http://libxml.org") -> XML::Parser
45
+ # XML::Parser.io(io, encoding: XML::Encoding::UTF_8,
46
+ # options: XML::Parser::Options::NOENT
47
+ # base_uri: "http://libxml.org") -> XML::Parser
46
48
  #
47
49
  # Creates a new parser for the specified io object.
48
50
  #
@@ -55,36 +57,36 @@ module LibXML
55
57
  # options - Parser options. Valid values are the constants defined on
56
58
  # XML::Parser::Options. Mutliple options can be combined
57
59
  # by using Bitwise OR (|).
58
- def self.io(io, options = {})
60
+ def self.io(io, base_uri: nil, encoding: nil, options: nil)
59
61
  context = XML::Parser::Context.io(io)
60
- context.base_uri = options[:base_uri] if options[:base_uri]
61
- context.encoding = options[:encoding] if options[:encoding]
62
- context.options = options[:options] if options[:options]
62
+ context.base_uri = base_uri if base_uri
63
+ context.encoding = encoding if encoding
64
+ context.options = options if options
63
65
  self.new(context)
64
66
  end
65
67
 
66
68
  # call-seq:
67
69
  # XML::Parser.string(string)
68
- # XML::Parser.string(string, :encoding => XML::Encoding::UTF_8,
69
- # :options => XML::Parser::Options::NOENT
70
- # :base_uri="http://libxml.org") -> XML::Parser
70
+ # XML::Parser.string(string, encoding: XML::Encoding::UTF_8,
71
+ # options: XML::Parser::Options::NOENT
72
+ # base_uri: "http://libxml.org") -> XML::Parser
71
73
  #
72
74
  # Creates a new parser by parsing the specified string.
73
75
  #
74
- # You may provide an optional hash table to control how the
75
- # parsing is performed. Valid options are:
76
+ # Parameters:
76
77
  #
78
+ # string - The string to parse
77
79
  # base_uri - The base url for the parsed document.
78
80
  # encoding - The document encoding, defaults to nil. Valid values
79
81
  # are the encoding constants defined on XML::Encoding.
80
82
  # options - Parser options. Valid values are the constants defined on
81
- # XML::Parser::Options. Mutliple options can be combined
83
+ # XML::Parser::Options. Multiple options can be combined
82
84
  # by using Bitwise OR (|).
83
- def self.string(string, options = {})
85
+ def self.string(string, base_uri: nil, encoding: nil, options: nil)
84
86
  context = XML::Parser::Context.string(string)
85
- context.base_uri = options[:base_uri] if options[:base_uri]
86
- context.encoding = options[:encoding] if options[:encoding]
87
- context.options = options[:options] if options[:options]
87
+ context.base_uri = base_uri if base_uri
88
+ context.encoding = encoding if encoding
89
+ context.options = options if options
88
90
  self.new(context)
89
91
  end
90
92
 
data/test/test.rb ADDED
@@ -0,0 +1,5 @@
1
+ begin
2
+ File.open("/does/not/exist")
3
+ rescue => e
4
+ puts e
5
+ end
@@ -129,4 +129,12 @@ class TestDocument < Minitest::Test
129
129
  file = File.join(File.dirname(__FILE__), 'model/atom.xml')
130
130
  schema_document = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NONET)
131
131
  end
132
+
133
+ def test_io
134
+ File.open(File.join(File.dirname(__FILE__), 'model/rubynet.xml')) do |io|
135
+ doc = LibXML::XML::Document.io(io)
136
+ assert_instance_of(LibXML::XML::Document, doc)
137
+ end
138
+ end
139
+
132
140
  end
@@ -7,14 +7,11 @@ class TestDocumentWrite < Minitest::Test
7
7
  def setup
8
8
  @file_name = "model/bands.utf-8.xml"
9
9
 
10
- # Strip spaces to make testing easier
11
- LibXML::XML.default_keep_blanks = false
12
10
  file = File.join(File.dirname(__FILE__), @file_name)
13
- @doc = LibXML::XML::Document.file(file)
11
+ @doc = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NOBLANKS)
14
12
  end
15
13
 
16
14
  def teardown
17
- LibXML::XML.default_keep_blanks = true
18
15
  @doc = nil
19
16
  end
20
17
 
data/test/test_dtd.rb CHANGED
@@ -31,13 +31,13 @@ class TestDtd < Minitest::Test
31
31
  end
32
32
 
33
33
  def test_internal_subset
34
- xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil, nil, true
34
+ xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil, nil, true)
35
35
  assert xhtml_dtd.name.nil?
36
36
  assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
37
37
  assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
38
38
  assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id
39
39
 
40
- xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1", nil, true
40
+ xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1", nil, true)
41
41
  assert_equal "xhtml1", xhtml_dtd.name
42
42
  assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
43
43
  assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
@@ -45,13 +45,13 @@ class TestDtd < Minitest::Test
45
45
  end
46
46
 
47
47
  def test_external_subset
48
- xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil
48
+ xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil)
49
49
  assert xhtml_dtd.name.nil?
50
50
  assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
51
51
  assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
52
52
  assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id
53
53
 
54
- xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1"
54
+ xhtml_dtd = LibXML::XML::Dtd.new("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1")
55
55
  assert_equal "xhtml1", xhtml_dtd.name
56
56
  assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id
57
57
  assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri
@@ -106,13 +106,11 @@ class TestDtd < Minitest::Test
106
106
  errors << error
107
107
  end
108
108
 
109
- LibXML::XML.default_load_external_dtd = false
110
109
  LibXML::XML::Parser.string(xml).parse
111
110
  assert_equal(0, errors.length)
112
111
 
113
112
  errors.clear
114
- LibXML::XML.default_load_external_dtd = true
115
- LibXML::XML::Parser.string(xml).parse
113
+ LibXML::XML::Parser.string(xml, options: LibXML::XML::Parser::Options::DTDLOAD).parse
116
114
  assert_equal(1, errors.length)
117
115
  assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.",
118
116
  errors[0].to_s)
@@ -123,7 +121,6 @@ class TestDtd < Minitest::Test
123
121
  assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.",
124
122
  errors[0].to_s)
125
123
  ensure
126
- LibXML::XML.default_load_external_dtd = false
127
124
  LibXML::XML::Error.reset_handler
128
125
  end
129
126
  end
@@ -38,10 +38,7 @@ class TestEncoding < Minitest::Test
38
38
  @encoding = encoding
39
39
  file = file_for_encoding(encoding)
40
40
 
41
- # Strip spaces to make testing easier
42
- LibXML::XML.default_keep_blanks = false
43
- @doc = LibXML::XML::Document.file(file)
44
- LibXML::XML.default_keep_blanks = true
41
+ @doc = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NOBLANKS)
45
42
  end
46
43
 
47
44
  def test_encoding
data/test/test_helper.rb CHANGED
@@ -1,9 +1,13 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  # To make testing/debugging easier, test within this source tree versus an installed gem
4
-
5
4
  require 'bundler/setup'
6
- require 'minitest/autorun'
5
+
6
+ # Add ext directory to load path to make it easier to test locally built extensions
7
+ ext_path = File.expand_path(File.join(__dir__, '..', 'ext', 'libxml'))
8
+ $LOAD_PATH.unshift(File.expand_path(ext_path))
9
+
10
+ # Now load code
7
11
  require 'libxml-ruby'
8
12
 
9
13
  def windows?
@@ -11,3 +15,6 @@ def windows?
11
15
  end
12
16
 
13
17
  STDOUT.write "\nlibxml2: #{LibXML::XML::LIBXML_VERSION}\n#{RUBY_DESCRIPTION}\n\n"
18
+
19
+ require 'minitest/autorun'
20
+
@@ -1,162 +1,162 @@
1
- # encoding: UTF-8
2
-
3
- require_relative './test_helper'
4
- require 'stringio'
5
-
6
- class HTMLParserTest < Minitest::Test
7
- def html_file
8
- File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
9
- end
10
-
11
- # ----- Sources ------
12
- def test_file
13
- xp = LibXML::XML::HTMLParser.file(html_file)
14
- assert_instance_of(LibXML::XML::HTMLParser, xp)
15
- doc = xp.parse
16
- refute_nil(doc)
17
- end
18
-
19
- def test_noexistent_file
20
- error = assert_raises(LibXML::XML::Error) do
21
- LibXML::XML::HTMLParser.file('i_dont_exist.xml')
22
- end
23
-
24
- assert_equal('Warning: failed to load external entity "i_dont_exist.xml".', error.to_s)
25
- end
26
-
27
- def test_nil_file
28
- error = assert_raises(TypeError) do
29
- LibXML::XML::HTMLParser.file(nil)
30
- end
31
-
32
- assert_match(/nil into String/, error.to_s)
33
- end
34
-
35
- def test_io
36
- File.open(html_file) do |io|
37
- xp = LibXML::XML::HTMLParser.io(io)
38
- assert_instance_of(LibXML::XML::HTMLParser, xp)
39
-
40
- doc = xp.parse
41
- assert_instance_of(LibXML::XML::Document, doc)
42
- end
43
- end
44
-
45
- def test_io_gc
46
- # Test that the reader keeps a reference
47
- # to the io object
48
- file = File.open(html_file)
49
- parser = LibXML::XML::HTMLParser.io(file)
50
- file = nil
51
- GC.start
52
- assert(parser.parse)
53
- end
54
-
55
- def test_nil_io
56
- error = assert_raises(TypeError) do
57
- LibXML::XML::HTMLParser.io(nil)
58
- end
59
-
60
- assert_equal("Must pass in an IO object", error.to_s)
61
- end
62
-
63
- def test_string_io
64
- data = File.read(html_file)
65
- io = StringIO.new(data)
66
- xp = LibXML::XML::HTMLParser.io(io)
67
- assert_instance_of(LibXML::XML::HTMLParser, xp)
68
-
69
- doc = xp.parse
70
- assert_instance_of(LibXML::XML::Document, doc)
71
- end
72
-
73
- def test_string
74
- str = '<html><body><p>hi</p></body></html>'
75
- xp = LibXML::XML::HTMLParser.string(str)
76
-
77
- assert_instance_of(LibXML::XML::HTMLParser, xp)
78
- assert_instance_of(LibXML::XML::HTMLParser, xp)
79
-
80
- doc = xp.parse
81
- assert_instance_of(LibXML::XML::Document, doc)
82
- end
83
-
84
- def test_nil_string
85
- error = assert_raises(TypeError) do
86
- LibXML::XML::HTMLParser.string(nil)
87
- end
88
-
89
- assert_equal("wrong argument type nil (expected String)", error.to_s)
90
- end
91
-
92
- def test_parse
93
- html = <<-EOS
94
- <html>
95
- <head>
96
- <meta name=keywords content=nasty>
97
- </head>
98
- <body>Hello<br>World</html>
99
- EOS
100
-
101
- parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOBLANKS)
102
- doc = parser.parse
103
- assert_instance_of LibXML::XML::Document, doc
104
-
105
- root = doc.root
106
- assert_instance_of LibXML::XML::Node, root
107
- assert_equal 'html', root.name
108
-
109
- head = root.child
110
- assert_instance_of LibXML::XML::Node, head
111
- assert_equal 'head', head.name
112
-
113
- meta = head.child
114
- assert_instance_of LibXML::XML::Node, meta
115
- assert_equal 'meta', meta.name
116
- assert_equal 'keywords', meta[:name]
117
- assert_equal 'nasty', meta[:content]
118
-
119
- body = head.next
120
- assert_instance_of LibXML::XML::Node, body
121
- assert_equal 'body', body.name
122
-
123
- hello = body.child
124
- # It appears that some versions of libxml2 add a layer of <p>
125
- # cant figure our why or how, so this skips it if there
126
- hello = hello.child if hello.name == "p"
127
-
128
- assert_instance_of LibXML::XML::Node, hello
129
- assert_equal 'Hello', hello.content
130
-
131
- br = hello.next
132
- assert_instance_of LibXML::XML::Node, br
133
- assert_equal 'br', br.name
134
-
135
- world = br.next
136
- assert_instance_of LibXML::XML::Node, world
137
- assert_equal 'World', world.content
138
- end
139
-
140
- def test_no_implied
141
- html = "hello world"
142
- parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOIMPLIED)
143
- doc = parser.parse
144
- assert_equal("<p>#{html}</p>", doc.root.to_s)
145
- end
146
-
147
- def test_comment
148
- doc = LibXML::XML::HTMLParser.string('<!-- stuff -->', :options => LibXML::XML::HTMLParser::Options::NOIMPLIED |
149
- LibXML::XML::HTMLParser::Options::NOERROR |
150
- LibXML::XML::HTMLParser::Options::NOWARNING |
151
- LibXML::XML::HTMLParser::Options::RECOVER |
152
- LibXML::XML::HTMLParser::Options::NONET)
153
- assert(doc)
154
- end
155
-
156
- def test_open_many_files
157
- file = File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
158
- 1000.times do
159
- LibXML::XML::HTMLParser.file(file).parse
160
- end
161
- end
162
- end
1
+ # encoding: UTF-8
2
+
3
+ require_relative './test_helper'
4
+ require 'stringio'
5
+
6
+ class HTMLParserTest < Minitest::Test
7
+ def html_file
8
+ File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
9
+ end
10
+
11
+ # ----- Sources ------
12
+ def test_file
13
+ xp = LibXML::XML::HTMLParser.file(html_file)
14
+ assert_instance_of(LibXML::XML::HTMLParser, xp)
15
+ doc = xp.parse
16
+ refute_nil(doc)
17
+ end
18
+
19
+ def test_noexistent_file
20
+ error = assert_raises(LibXML::XML::Error) do
21
+ LibXML::XML::HTMLParser.file('i_dont_exist.xml')
22
+ end
23
+
24
+ assert_equal('Warning: failed to load external entity "i_dont_exist.xml".', error.to_s)
25
+ end
26
+
27
+ def test_nil_file
28
+ error = assert_raises(TypeError) do
29
+ LibXML::XML::HTMLParser.file(nil)
30
+ end
31
+
32
+ assert_match(/nil into String/, error.to_s)
33
+ end
34
+
35
+ def test_io
36
+ File.open(html_file) do |io|
37
+ xp = LibXML::XML::HTMLParser.io(io)
38
+ assert_instance_of(LibXML::XML::HTMLParser, xp)
39
+
40
+ doc = xp.parse
41
+ assert_instance_of(LibXML::XML::Document, doc)
42
+ end
43
+ end
44
+
45
+ def test_io_gc
46
+ # Test that the reader keeps a reference
47
+ # to the io object
48
+ file = File.open(html_file)
49
+ parser = LibXML::XML::HTMLParser.io(file)
50
+ file = nil
51
+ GC.start
52
+ assert(parser.parse)
53
+ end
54
+
55
+ def test_nil_io
56
+ error = assert_raises(TypeError) do
57
+ LibXML::XML::HTMLParser.io(nil)
58
+ end
59
+
60
+ assert_equal("Must pass in an IO object", error.to_s)
61
+ end
62
+
63
+ def test_string_io
64
+ data = File.read(html_file)
65
+ io = StringIO.new(data)
66
+ xp = LibXML::XML::HTMLParser.io(io)
67
+ assert_instance_of(LibXML::XML::HTMLParser, xp)
68
+
69
+ doc = xp.parse
70
+ assert_instance_of(LibXML::XML::Document, doc)
71
+ end
72
+
73
+ def test_string
74
+ str = '<html><body><p>hi</p></body></html>'
75
+ xp = LibXML::XML::HTMLParser.string(str)
76
+
77
+ assert_instance_of(LibXML::XML::HTMLParser, xp)
78
+ assert_instance_of(LibXML::XML::HTMLParser, xp)
79
+
80
+ doc = xp.parse
81
+ assert_instance_of(LibXML::XML::Document, doc)
82
+ end
83
+
84
+ def test_nil_string
85
+ error = assert_raises(TypeError) do
86
+ LibXML::XML::HTMLParser.string(nil)
87
+ end
88
+
89
+ assert_equal("wrong argument type nil (expected String)", error.to_s)
90
+ end
91
+
92
+ def test_parse
93
+ html = <<-EOS
94
+ <html>
95
+ <head>
96
+ <meta name=keywords content=nasty>
97
+ </head>
98
+ <body>Hello<br>World</html>
99
+ EOS
100
+
101
+ parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOBLANKS)
102
+ doc = parser.parse
103
+ assert_instance_of LibXML::XML::Document, doc
104
+
105
+ root = doc.root
106
+ assert_instance_of LibXML::XML::Node, root
107
+ assert_equal 'html', root.name
108
+
109
+ head = root.child
110
+ assert_instance_of LibXML::XML::Node, head
111
+ assert_equal 'head', head.name
112
+
113
+ meta = head.child
114
+ assert_instance_of LibXML::XML::Node, meta
115
+ assert_equal 'meta', meta.name
116
+ assert_equal 'keywords', meta[:name]
117
+ assert_equal 'nasty', meta[:content]
118
+
119
+ body = head.next
120
+ assert_instance_of LibXML::XML::Node, body
121
+ assert_equal 'body', body.name
122
+
123
+ hello = body.child
124
+ # It appears that some versions of libxml2 add a layer of <p>
125
+ # cant figure our why or how, so this skips it if there
126
+ hello = hello.child if hello.name == "p"
127
+
128
+ assert_instance_of LibXML::XML::Node, hello
129
+ assert_equal 'Hello', hello.content
130
+
131
+ br = hello.next
132
+ assert_instance_of LibXML::XML::Node, br
133
+ assert_equal 'br', br.name
134
+
135
+ world = br.next
136
+ assert_instance_of LibXML::XML::Node, world
137
+ assert_equal 'World', world.content
138
+ end
139
+
140
+ def test_no_implied
141
+ html = "hello world"
142
+ parser = LibXML::XML::HTMLParser.string(html, :options => LibXML::XML::HTMLParser::Options::NOIMPLIED)
143
+ doc = parser.parse
144
+ assert_equal("<p>#{html}</p>", doc.root.to_s)
145
+ end
146
+
147
+ def test_comment
148
+ doc = LibXML::XML::HTMLParser.string('<!-- stuff -->', :options => LibXML::XML::HTMLParser::Options::NOIMPLIED |
149
+ LibXML::XML::HTMLParser::Options::NOERROR |
150
+ LibXML::XML::HTMLParser::Options::NOWARNING |
151
+ LibXML::XML::HTMLParser::Options::RECOVER |
152
+ LibXML::XML::HTMLParser::Options::NONET)
153
+ assert(doc)
154
+ end
155
+
156
+ def test_open_many_files
157
+ file = File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
158
+ 1000.times do
159
+ LibXML::XML::HTMLParser.file(file).parse
160
+ end
161
+ end
162
+ end
@@ -37,9 +37,7 @@ class TestNS < Minitest::Test
37
37
  def test_duplicate_ns
38
38
  node = LibXML::XML::Node.new('foo')
39
39
  LibXML::XML::Namespace.new(node, 'myname', 'http://www.mynamespace.com')
40
- assert_raises(LibXML::XML::Error) do
41
- LibXML::XML::Namespace.new(node, 'myname', 'http://www.mynamespace.com')
42
- end
40
+ LibXML::XML::Namespace.new(node, 'myname', 'http://www.mynamespace.com')
43
41
  end
44
42
 
45
43
  def test_eql