nokogiri 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (62) hide show
  1. data/CHANGELOG.ja.rdoc +25 -0
  2. data/CHANGELOG.rdoc +23 -0
  3. data/Manifest.txt +5 -0
  4. data/README.ja.rdoc +5 -5
  5. data/README.rdoc +3 -3
  6. data/Rakefile +27 -23
  7. data/ext/nokogiri/extconf.rb +54 -12
  8. data/ext/nokogiri/xml_document.c +4 -1
  9. data/ext/nokogiri/xml_document.h +2 -0
  10. data/ext/nokogiri/xml_dtd.c +29 -0
  11. data/ext/nokogiri/xml_node.c +9 -1
  12. data/ext/nokogiri/xml_node_set.c +5 -1
  13. data/ext/nokogiri/xml_relax_ng.c +50 -3
  14. data/ext/nokogiri/xml_sax_parser.c +84 -77
  15. data/ext/nokogiri/xml_schema.c +52 -3
  16. data/ext/nokogiri/xml_syntax_error.c +7 -0
  17. data/ext/nokogiri/xml_syntax_error.h +1 -0
  18. data/lib/nokogiri.rb +2 -2
  19. data/lib/nokogiri/css/parser.rb +2 -2
  20. data/lib/nokogiri/ffi/io_callbacks.rb +20 -12
  21. data/lib/nokogiri/ffi/libxml.rb +8 -0
  22. data/lib/nokogiri/ffi/xml/document.rb +1 -1
  23. data/lib/nokogiri/ffi/xml/dtd.rb +22 -6
  24. data/lib/nokogiri/ffi/xml/namespace.rb +9 -7
  25. data/lib/nokogiri/ffi/xml/node.rb +4 -0
  26. data/lib/nokogiri/ffi/xml/node_set.rb +4 -1
  27. data/lib/nokogiri/ffi/xml/relax_ng.rb +35 -3
  28. data/lib/nokogiri/ffi/xml/sax/parser.rb +20 -19
  29. data/lib/nokogiri/ffi/xml/schema.rb +41 -4
  30. data/lib/nokogiri/html.rb +2 -2
  31. data/lib/nokogiri/html/document.rb +3 -3
  32. data/lib/nokogiri/version.rb +2 -2
  33. data/lib/nokogiri/xml.rb +3 -3
  34. data/lib/nokogiri/xml/document.rb +14 -4
  35. data/lib/nokogiri/xml/fragment_handler.rb +8 -0
  36. data/lib/nokogiri/xml/node.rb +1 -104
  37. data/lib/nokogiri/xml/node_set.rb +46 -6
  38. data/lib/nokogiri/xml/parse_options.rb +7 -2
  39. data/lib/nokogiri/xml/relax_ng.rb +2 -2
  40. data/lib/nokogiri/xml/sax.rb +1 -0
  41. data/lib/nokogiri/xml/sax/document.rb +4 -4
  42. data/lib/nokogiri/xml/sax/legacy_handlers.rb +65 -0
  43. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  44. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  45. data/lib/nokogiri/xml/schema.rb +1 -5
  46. data/lib/xsd/xmlparser/nokogiri.rb +14 -7
  47. data/tasks/test.rb +1 -62
  48. data/test/files/bar/bar.xsd +4 -0
  49. data/test/files/foo/foo.xsd +4 -0
  50. data/test/files/snuggles.xml +3 -0
  51. data/test/files/valid_bar.xml +2 -0
  52. data/test/helper.rb +9 -8
  53. data/test/html/test_document_fragment.rb +14 -0
  54. data/test/test_reader.rb +10 -10
  55. data/test/xml/sax/test_parser.rb +77 -0
  56. data/test/xml/sax/test_push_parser.rb +11 -7
  57. data/test/xml/test_document.rb +25 -0
  58. data/test/xml/test_dtd.rb +6 -1
  59. data/test/xml/test_node.rb +7 -0
  60. data/test/xml/test_node_set.rb +19 -0
  61. data/test/xml/test_schema.rb +24 -0
  62. metadata +10 -5
@@ -30,6 +30,9 @@ module Nokogiri
30
30
  # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
31
31
  # see Nokogiri::XML::SAX::Document for the available events.
32
32
  class Parser
33
+ class Attribute < Struct.new(:localname, :prefix, :uri, :value)
34
+ end
35
+
33
36
  # Encodinds this parser supports
34
37
  ENCODINGS = {
35
38
  'NONE' => 0, # No char encoding detected
@@ -67,6 +70,7 @@ module Nokogiri
67
70
  def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = 'ASCII')
68
71
  @encoding = encoding
69
72
  @document = doc
73
+ @warned = false
70
74
  end
71
75
 
72
76
  ###
@@ -95,6 +99,9 @@ module Nokogiri
95
99
  raise Errno::EISDIR if File.directory?(filename)
96
100
  native_parse_file filename
97
101
  end
102
+
103
+ private
104
+ include Nokogiri::XML::SAX::LegacyHandlers
98
105
  end
99
106
  end
100
107
  end
@@ -54,6 +54,9 @@ module Nokogiri
54
54
  def finish
55
55
  write '', true
56
56
  end
57
+
58
+ private
59
+ include Nokogiri::XML::SAX::LegacyHandlers
57
60
  end
58
61
  end
59
62
  end
@@ -34,11 +34,7 @@ module Nokogiri
34
34
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
35
  # object.
36
36
  def self.new string_or_io
37
- if string_or_io.respond_to?(:read)
38
- string_or_io = string_or_io.read
39
- end
40
-
41
- read_memory(string_or_io)
37
+ from_document Nokogiri::XML(string_or_io)
42
38
  end
43
39
 
44
40
  ###
@@ -13,15 +13,16 @@ module XSD # :nodoc:
13
13
  # Example (using UW ITS Web Services):
14
14
  #
15
15
  # require 'rubygems'
16
- # gem 'soap4r'
17
16
  # require 'nokogiri'
18
- # require 'xsd/xmlparser/nokogiri'
17
+ # gem 'soap4r'
19
18
  # require 'defaultDriver'
20
- #
19
+ # require 'xsd/xmlparser/nokogiri'
20
+ #
21
21
  # obj = AvlPortType.new
22
- # obj.getLatestByRoute(obj.getAgencies, 8).each do |event|
23
- # ...
22
+ # obj.getLatestByRoute(obj.getAgencies.first, 8).each do |bus|
23
+ # p "#{bus.routeID}, #{bus.longitude}, #{bus.latitude}"
24
24
  # end
25
+ #
25
26
  class Nokogiri < XSD::XMLParser::Parser
26
27
  ###
27
28
  # Create a new XSD parser with +host+ and +opt+
@@ -39,7 +40,13 @@ module XSD # :nodoc:
39
40
  ###
40
41
  # Handle the start_element event with +name+ and +attrs+
41
42
  def start_element name, attrs = []
42
- super(name, Hash[*attrs])
43
+ super(name, Hash[*attrs.flatten])
44
+ end
45
+
46
+ ###
47
+ # Handle the end_element event with +name+
48
+ def end_element name
49
+ super
43
50
  end
44
51
 
45
52
  ###
@@ -55,7 +62,7 @@ module XSD # :nodoc:
55
62
  characters string
56
63
  end
57
64
 
58
- %w{ start_document end_document comment }.each do |name|
65
+ %w{ start_document start_element_namespace end_element_namespace end_document comment }.each do |name|
59
66
  class_eval %{ def #{name}(*args); end }
60
67
  end
61
68
  add_factory(self)
@@ -1,64 +1,3 @@
1
- # partial-loads-ok and undef-value-errors necessary to ignore
2
- # spurious (and eminently ignorable) warnings from the ruby
3
- # interpreter
4
- VALGRIND_BASIC_OPTS = "--num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no"
5
-
6
- class NokogiriTestTask < Rake::TestTask
7
- def initialize *args
8
- super
9
- %w[ ext lib bin test ].each do |dir|
10
- self.libs << dir
11
- end
12
- self.test_files = FileList['test/**/test_*.rb'] +
13
- FileList['test/**/*_test.rb']
14
- self.verbose = "verbose"
15
- self.warning = true
16
- end
17
- end
18
-
19
- desc "run test suite under valgrind with basic ruby options"
20
- NokogiriTestTask.new('test:valgrind').extend(Module.new {
21
- def ruby *args
22
- run_with_env "valgrind #{VALGRIND_BASIC_OPTS} #{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
23
- end
24
- })
25
-
26
- desc "run test suite under valgrind with memory-fill ruby options"
27
- NokogiriTestTask.new('test:valgrind_mem').extend(Module.new {
28
- def ruby *args
29
- run_with_env "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=6D --free-fill=66 #{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
30
- end
31
- })
32
-
33
- desc "run test suite under valgrind with memory-zero ruby options"
34
- NokogiriTestTask.new('test:valgrind_mem0').extend(Module.new {
35
- def ruby *args
36
- run_with_env "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=00 --free-fill=00 #{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
37
- end
38
- })
39
-
40
- desc "run test suite under gdb"
41
- NokogiriTestTask.new('test:gdb').extend(Module.new {
42
- def ruby *args
43
- run_with_env "gdb --args #{RUBY} #{args.join(' ')}"
44
- end
45
- })
46
-
47
- desc "test coverage"
48
- NokogiriTestTask.new('test:coverage').extend(Module.new {
49
- def ruby *args
50
- rm_rf "coverage"
51
- run_with_env "rcov -x Library -I lib:ext:test #{args.join(' ')}"
52
- end
53
- })
54
-
55
- desc "run test suite with verbose output"
56
- NokogiriTestTask.new('test:verbose').extend(Module.new {
57
- def ruby *args
58
- run_with_env "#{RUBY} #{args.join(' ')} test/test_nokogiri.rb --verbose=verbose"
59
- end
60
- })
61
-
62
1
  def run_with_env(cmd)
63
2
  cmd = "LD_LIBRARY_PATH='#{ENV['LD_LIBRARY_PATH']}' #{cmd}"
64
3
  puts "=> #{cmd}"
@@ -142,7 +81,7 @@ namespace :test do
142
81
  directories = ENV['MULTIXML2_DIR'] ? [ENV['MULTIXML2_DIR']] : Dir[File.join(MULTI_XML, 'install', '*')]
143
82
  directories.sort.reverse.each do |xml2_version|
144
83
  next unless xml2_version =~ /libxml2/
145
- extopts = "--with-xml2-include=#{xml2_version}/include/libxml2 --with-xml2-lib=#{xml2_version}/lib --with-xslt-dir=#{libxslt}"
84
+ extopts = "--with-xml2-include=#{xml2_version}/include/libxml2 --with-xml2-lib=#{xml2_version}/lib --with-xslt-dir=#{libxslt} --with-iconv-dir=/usr"
146
85
  cmd = "#{$0} clean test EXTOPTS='#{extopts}' LD_LIBRARY_PATH='#{xml2_version}/lib'"
147
86
 
148
87
  version = File.basename(xml2_version)
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
3
+ <xsd:element name="bar"/>
4
+ </xsd:schema>
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
3
+ <xsd:include schemaLocation="../bar/bar.xsd"/>
4
+ </xsd:schema>
@@ -0,0 +1,3 @@
1
+ <x xmlns:tenderlove='http://tenderlovemaking.com/'>
2
+ <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
3
+ </x>
@@ -0,0 +1,2 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <bar />
@@ -23,6 +23,7 @@ module Nokogiri
23
23
  PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
24
24
  ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
25
25
  ADDRESS_XML_FILE = File.join(ASSETS_DIR, 'address_book.xml')
26
+ SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
26
27
 
27
28
  unless RUBY_VERSION >= '1.9'
28
29
  undef :default_test
@@ -55,10 +56,10 @@ module Nokogiri
55
56
  module SAX
56
57
  class TestCase < Nokogiri::TestCase
57
58
  class Doc < XML::SAX::Document
58
- attr_reader :start_elements, :start_elements_ns, :start_document_called
59
- attr_reader :end_elements, :end_elements_ns, :end_document_called
60
- attr_reader :data, :comments, :cdata_blocks
61
- attr_reader :errors, :warnings
59
+ attr_reader :start_elements, :start_document_called
60
+ attr_reader :end_elements, :end_document_called
61
+ attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
62
+ attr_reader :errors, :warnings, :end_elements_namespace
62
63
 
63
64
  def start_document
64
65
  @start_document_called = true
@@ -85,8 +86,8 @@ module Nokogiri
85
86
  super
86
87
  end
87
88
 
88
- def start_element_ns *args
89
- (@start_elements_ns ||= []) << args
89
+ def start_element_namespace *args
90
+ (@start_elements_namespace ||= []) << args
90
91
  super
91
92
  end
92
93
 
@@ -95,8 +96,8 @@ module Nokogiri
95
96
  super
96
97
  end
97
98
 
98
- def end_element_ns *args
99
- (@end_elements_ns ||= []) << args
99
+ def end_element_namespace *args
100
+ (@end_elements_namespace ||= []) << args
100
101
  super
101
102
  end
102
103
 
@@ -92,6 +92,20 @@ module Nokogiri
92
92
  assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
93
93
  end
94
94
 
95
+ def test_fragment_script_tag_with_cdata
96
+ doc = HTML::Document.new
97
+ fragment = doc.fragment("<script>var foo = 'bar';</script>")
98
+ assert_equal("<script>var foo = 'bar';</script>",
99
+ fragment.to_s)
100
+ end
101
+
102
+ def test_fragment_with_comment
103
+ doc = HTML::Document.new
104
+ fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
105
+ assert_equal("<p>hello<!-- your ad here --></p>",
106
+ fragment.to_s)
107
+ end
108
+
95
109
  end
96
110
  end
97
111
  end
@@ -3,11 +3,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
3
3
 
4
4
  class TestReader < Nokogiri::TestCase
5
5
  def test_from_io_sets_io_as_source
6
- io = StringIO.new(<<-eoxml)
7
- <x xmlns:tenderlove='http://tenderlovemaking.com/'>
8
- <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
9
- </x>
10
- eoxml
6
+ io = File.open SNUGGLES_FILE
11
7
  reader = Nokogiri::XML::Reader.from_io(io)
12
8
  assert_equal io, reader.source
13
9
  end
@@ -33,11 +29,7 @@ class TestReader < Nokogiri::TestCase
33
29
  end
34
30
 
35
31
  def test_from_io
36
- io = StringIO.new(<<-eoxml)
37
- <x xmlns:tenderlove='http://tenderlovemaking.com/'>
38
- <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
39
- </x>
40
- eoxml
32
+ io = File.open SNUGGLES_FILE
41
33
  reader = Nokogiri::XML::Reader.from_io(io)
42
34
  assert_equal false, reader.default?
43
35
  assert_equal [false, false, false, false, false, false, false],
@@ -45,6 +37,14 @@ class TestReader < Nokogiri::TestCase
45
37
  end
46
38
 
47
39
  def test_io
40
+ io = File.open SNUGGLES_FILE
41
+ reader = Nokogiri::XML::Reader(io)
42
+ assert_equal false, reader.default?
43
+ assert_equal [false, false, false, false, false, false, false],
44
+ reader.map { |x| x.default? }
45
+ end
46
+
47
+ def test_string_io
48
48
  io = StringIO.new(<<-eoxml)
49
49
  <x xmlns:tenderlove='http://tenderlovemaking.com/'>
50
50
  <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
@@ -9,12 +9,89 @@ module Nokogiri
9
9
  @parser = XML::SAX::Parser.new(Doc.new)
10
10
  end
11
11
 
12
+ def test_namespace_declaration_order_is_saved
13
+ @parser.parse <<-eoxml
14
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
15
+ <a foo:bar='hello' />
16
+ </root>
17
+ eoxml
18
+ assert_equal 2, @parser.document.start_elements_namespace.length
19
+ el = @parser.document.start_elements_namespace.first
20
+ namespaces = el.last
21
+ assert_equal ['foo', 'http://foo.example.com/'], namespaces.first
22
+ assert_equal [nil, 'http://example.com/'], namespaces.last
23
+ end
24
+
12
25
  def test_bad_document_calls_error_handler
13
26
  @parser.parse('<foo><bar></foo>')
14
27
  assert @parser.document.errors
15
28
  assert @parser.document.errors.length > 0
16
29
  end
17
30
 
31
+ def test_namespace_are_super_fun_to_parse
32
+ @parser.parse <<-eoxml
33
+ <root xmlns:foo='http://foo.example.com/'>
34
+ <a foo:bar='hello' />
35
+ <b xmlns:foo='http://bar.example.com/'>
36
+ <a foo:bar='hello' />
37
+ </b>
38
+ <foo:bar>hello world</foo:bar>
39
+ </root>
40
+ eoxml
41
+ assert @parser.document.start_elements_namespace.length > 0
42
+ el = @parser.document.start_elements_namespace[1]
43
+ assert_equal 'a', el.first
44
+ assert_equal 1, el[1].length
45
+
46
+ attribute = el[1].first
47
+ assert_equal 'bar', attribute.localname
48
+ assert_equal 'foo', attribute.prefix
49
+ assert_equal 'hello', attribute.value
50
+ assert_equal 'http://foo.example.com/', attribute.uri
51
+ end
52
+
53
+ def test_sax_v1_namespace_attribute_declarations
54
+ @parser.parse <<-eoxml
55
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
56
+ <a foo:bar='hello' />
57
+ <b xmlns:foo='http://bar.example.com/'>
58
+ <a foo:bar='hello' />
59
+ </b>
60
+ <foo:bar>hello world</foo:bar>
61
+ </root>
62
+ eoxml
63
+ assert @parser.document.start_elements.length > 0
64
+ elm = @parser.document.start_elements.first
65
+ assert_equal 'root', elm.first
66
+ assert elm[1].include?(['xmlns:foo', 'http://foo.example.com/'])
67
+ assert elm[1].include?(['xmlns', 'http://example.com/'])
68
+ end
69
+
70
+ def test_sax_v1_namespace_nodes
71
+ @parser.parse <<-eoxml
72
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
73
+ <a foo:bar='hello' />
74
+ <b xmlns:foo='http://bar.example.com/'>
75
+ <a foo:bar='hello' />
76
+ </b>
77
+ <foo:bar>hello world</foo:bar>
78
+ </root>
79
+ eoxml
80
+ assert_equal 5, @parser.document.start_elements.length
81
+ assert @parser.document.start_elements.map { |se|
82
+ se.first
83
+ }.include?('foo:bar')
84
+ assert @parser.document.end_elements.map { |se|
85
+ se.first
86
+ }.include?('foo:bar')
87
+ end
88
+
89
+ def test_start_is_called_without_namespace
90
+ @parser.parse('<foo:f><bar></foo:f>')
91
+ assert_equal ['foo:f', 'bar'],
92
+ @parser.document.start_elements.map { |x| x.first }
93
+ end
94
+
18
95
  def test_parser_sets_encoding
19
96
  parser = XML::SAX::Parser.new(Doc.new, 'UTF-8')
20
97
  assert_equal 'UTF-8', parser.encoding
@@ -43,12 +43,16 @@ module Nokogiri
43
43
  <stream:stream xmlns='jabber:client' xmlns:stream='http://etherx.jabber.org/streams' version='1.0' size='large'></stream:stream>
44
44
  eoxml
45
45
 
46
- assert_equal [[ 'stream',
47
- {'version' => '1.0', 'size' => 'large'},
48
- 'stream',
49
- 'http://etherx.jabber.org/streams',
50
- {nil => 'jabber:client', 'stream' => 'http://etherx.jabber.org/streams'}]],
51
- @parser.document.start_elements_ns
46
+ assert_equal 1, @parser.document.start_elements_namespace.length
47
+ el = @parser.document.start_elements_namespace.first
48
+
49
+ assert_equal 'stream', el.first
50
+ assert_equal 2, el[1].length
51
+ assert_equal [['version', '1.0'], ['size', 'large']],
52
+ el[1].map { |x| [x.localname, x.value] }
53
+
54
+ assert_equal 'stream', el[2]
55
+ assert_equal 'http://etherx.jabber.org/streams', el[3]
52
56
  @parser.finish
53
57
  end
54
58
 
@@ -58,7 +62,7 @@ module Nokogiri
58
62
  eoxml
59
63
 
60
64
  assert_equal [['stream', 'stream', 'http://etherx.jabber.org/streams']],
61
- @parser.document.end_elements_ns
65
+ @parser.document.end_elements_namespace
62
66
  @parser.finish
63
67
  end
64
68
 
@@ -10,6 +10,31 @@ module Nokogiri
10
10
  @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
11
11
  end
12
12
 
13
+ def test_validate
14
+ assert_equal 44, @xml.validate.length
15
+ end
16
+
17
+ def test_validate_no_internal_subset
18
+ doc = Nokogiri::XML('<test/>')
19
+ assert_nil doc.validate
20
+ end
21
+
22
+ def test_clone
23
+ assert @xml.clone
24
+ end
25
+
26
+ def test_document_should_not_have_default_ns
27
+ doc = Nokogiri::XML::Document.new
28
+
29
+ assert_raises NoMethodError do
30
+ doc.default_namespace = 'http://innernet.com/'
31
+ end
32
+
33
+ assert_raises NoMethodError do
34
+ doc.add_namespace_definition('foo', 'bar')
35
+ end
36
+ end
37
+
13
38
  def test_parse_takes_block
14
39
  options = nil
15
40
  Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg|