nokogiri 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (62) hide show
  1. data/CHANGELOG.ja.rdoc +25 -0
  2. data/CHANGELOG.rdoc +23 -0
  3. data/Manifest.txt +5 -0
  4. data/README.ja.rdoc +5 -5
  5. data/README.rdoc +3 -3
  6. data/Rakefile +27 -23
  7. data/ext/nokogiri/extconf.rb +54 -12
  8. data/ext/nokogiri/xml_document.c +4 -1
  9. data/ext/nokogiri/xml_document.h +2 -0
  10. data/ext/nokogiri/xml_dtd.c +29 -0
  11. data/ext/nokogiri/xml_node.c +9 -1
  12. data/ext/nokogiri/xml_node_set.c +5 -1
  13. data/ext/nokogiri/xml_relax_ng.c +50 -3
  14. data/ext/nokogiri/xml_sax_parser.c +84 -77
  15. data/ext/nokogiri/xml_schema.c +52 -3
  16. data/ext/nokogiri/xml_syntax_error.c +7 -0
  17. data/ext/nokogiri/xml_syntax_error.h +1 -0
  18. data/lib/nokogiri.rb +2 -2
  19. data/lib/nokogiri/css/parser.rb +2 -2
  20. data/lib/nokogiri/ffi/io_callbacks.rb +20 -12
  21. data/lib/nokogiri/ffi/libxml.rb +8 -0
  22. data/lib/nokogiri/ffi/xml/document.rb +1 -1
  23. data/lib/nokogiri/ffi/xml/dtd.rb +22 -6
  24. data/lib/nokogiri/ffi/xml/namespace.rb +9 -7
  25. data/lib/nokogiri/ffi/xml/node.rb +4 -0
  26. data/lib/nokogiri/ffi/xml/node_set.rb +4 -1
  27. data/lib/nokogiri/ffi/xml/relax_ng.rb +35 -3
  28. data/lib/nokogiri/ffi/xml/sax/parser.rb +20 -19
  29. data/lib/nokogiri/ffi/xml/schema.rb +41 -4
  30. data/lib/nokogiri/html.rb +2 -2
  31. data/lib/nokogiri/html/document.rb +3 -3
  32. data/lib/nokogiri/version.rb +2 -2
  33. data/lib/nokogiri/xml.rb +3 -3
  34. data/lib/nokogiri/xml/document.rb +14 -4
  35. data/lib/nokogiri/xml/fragment_handler.rb +8 -0
  36. data/lib/nokogiri/xml/node.rb +1 -104
  37. data/lib/nokogiri/xml/node_set.rb +46 -6
  38. data/lib/nokogiri/xml/parse_options.rb +7 -2
  39. data/lib/nokogiri/xml/relax_ng.rb +2 -2
  40. data/lib/nokogiri/xml/sax.rb +1 -0
  41. data/lib/nokogiri/xml/sax/document.rb +4 -4
  42. data/lib/nokogiri/xml/sax/legacy_handlers.rb +65 -0
  43. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  44. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  45. data/lib/nokogiri/xml/schema.rb +1 -5
  46. data/lib/xsd/xmlparser/nokogiri.rb +14 -7
  47. data/tasks/test.rb +1 -62
  48. data/test/files/bar/bar.xsd +4 -0
  49. data/test/files/foo/foo.xsd +4 -0
  50. data/test/files/snuggles.xml +3 -0
  51. data/test/files/valid_bar.xml +2 -0
  52. data/test/helper.rb +9 -8
  53. data/test/html/test_document_fragment.rb +14 -0
  54. data/test/test_reader.rb +10 -10
  55. data/test/xml/sax/test_parser.rb +77 -0
  56. data/test/xml/sax/test_push_parser.rb +11 -7
  57. data/test/xml/test_document.rb +25 -0
  58. data/test/xml/test_dtd.rb +6 -1
  59. data/test/xml/test_node.rb +7 -0
  60. data/test/xml/test_node_set.rb +19 -0
  61. data/test/xml/test_schema.rb +24 -0
  62. metadata +10 -5
@@ -2,22 +2,30 @@ module Nokogiri
2
2
  module IoCallbacks # :nodoc:
3
3
 
4
4
  class << self
5
+
6
+ def plain_old_reader(io)
7
+ lambda do |ctx, buffer, len|
8
+ string = io.read(len)
9
+ return 0 if string.nil?
10
+ buffer.put_bytes(0, string, 0, string.length)
11
+ string.length
12
+ end
13
+ end
14
+
5
15
  if defined?(FFI::IO.native_read)
6
- def reader(io)
7
- lambda do |ctx, buffer, len|
8
- rcode = FFI::IO.native_read(io, buffer, len)
9
- (rcode < 0) ? 0 : rcode
16
+ def ffi_io_native_reader(io)
17
+ if io.is_a?(StringIO)
18
+ plain_old_reader(io)
19
+ else
20
+ lambda do |ctx, buffer, len|
21
+ rcode = FFI::IO.native_read(io, buffer, len)
22
+ (rcode < 0) ? 0 : rcode
23
+ end
10
24
  end
11
25
  end
26
+ alias :reader :ffi_io_native_reader
12
27
  else
13
- def reader(io) # TODO: this can be removed once JRuby 1.3.0RC2 and ruby-ffi 0.4.0 are both released
14
- lambda do |ctx, buffer, len|
15
- string = io.read(len)
16
- return 0 if string.nil?
17
- buffer.put_bytes(0, string, 0, string.length)
18
- string.length
19
- end
20
- end
28
+ alias :reader :plain_old_reader
21
29
  end
22
30
 
23
31
  def writer(io)
@@ -118,6 +118,7 @@ module Nokogiri
118
118
  attach_function :xmlStringGetNodeList, [:pointer, :pointer], :pointer # second arg should be a :string, but we only ship the results of xmlEncodeEntitiesReentrant, so let's optimize.
119
119
  attach_function :xmlNewNs, [:pointer, :string, :string], :pointer
120
120
  attach_function :xmlNewNsProp, [:pointer, :pointer, :string, :string], :pointer
121
+ attach_function :xmlSearchNs, [:pointer, :pointer, :string], :pointer
121
122
  attach_function :xmlSearchNsByHref, [:pointer, :pointer, :string], :pointer
122
123
  attach_function :xmlGetIntSubset, [:pointer], :pointer
123
124
  attach_function :xmlBufferCreate, [], :pointer
@@ -126,6 +127,11 @@ module Nokogiri
126
127
  attach_function :xmlNewDocProp, [:pointer, :string, :string], :pointer
127
128
  attach_function :xmlFreePropList, [:pointer], :void
128
129
 
130
+ # valid.c
131
+ attach_function :xmlNewValidCtxt, [], :pointer
132
+ attach_function :xmlValidateDtd, [:pointer, :pointer, :pointer], :int
133
+ attach_function :xmlFreeValidCtxt, [:pointer], :void
134
+
129
135
  # xmlsave.c
130
136
  attach_function :xmlDocDumpMemory, [:pointer, :pointer, :pointer], :void
131
137
  attach_function :xmlNodeDump, [:pointer, :pointer, :pointer, :int, :int], :int
@@ -240,6 +246,7 @@ module Nokogiri
240
246
  attach_function :xmlSchemaSetParserStructuredErrors, [:pointer, :syntax_error_handler, :pointer], :void unless Nokogiri.is_2_6_16?
241
247
  attach_function :xmlSchemaParse, [:pointer], :pointer
242
248
  attach_function :xmlSchemaFreeParserCtxt, [:pointer], :void
249
+ attach_function :xmlSchemaNewDocParserCtxt, [:pointer], :pointer
243
250
 
244
251
  # relaxng.c
245
252
  attach_function :xmlRelaxNGNewValidCtxt, [:pointer], :pointer
@@ -250,6 +257,7 @@ module Nokogiri
250
257
  attach_function :xmlRelaxNGSetParserStructuredErrors, [:pointer, :syntax_error_handler, :pointer], :void unless Nokogiri.is_2_6_16?
251
258
  attach_function :xmlRelaxNGParse, [:pointer], :pointer
252
259
  attach_function :xmlRelaxNGFreeParserCtxt, [:pointer], :void
260
+ attach_function :xmlRelaxNGNewDocParserCtxt, [:pointer], :pointer
253
261
 
254
262
  # helpers
255
263
  def self.pointer_offset(n)
@@ -84,7 +84,7 @@ module Nokogiri
84
84
  LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(error_list))
85
85
 
86
86
  ptr = yield
87
-
87
+
88
88
  LibXML.xmlSetStructuredErrorFunc(nil, nil)
89
89
 
90
90
  if ptr.null?
@@ -1,16 +1,30 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class DTD < Node
4
-
5
- def elements # :nodoc:
4
+ # :stopdoc:
5
+ def validate document
6
+ error_list = []
7
+ ctxt = LibXML.xmlNewValidCtxt
8
+
9
+ LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(error_list))
10
+ LibXML.xmlValidateDtd ctxt, document.cstruct, cstruct
11
+
12
+ LibXML.xmlSetStructuredErrorFunc nil, nil
13
+
14
+ LibXML.xmlFreeValidCtxt ctxt
15
+
16
+ error_list
17
+ end
18
+
19
+ def elements
6
20
  internal_attributes :elements
7
21
  end
8
22
 
9
- def entities # :nodoc:
23
+ def entities
10
24
  internal_attributes :entities
11
25
  end
12
26
 
13
- def notations # :nodoc:
27
+ def notations
14
28
  attr_ptr = cstruct[:notations]
15
29
  return nil if attr_ptr.null?
16
30
 
@@ -24,8 +38,9 @@ module Nokogiri
24
38
  ahash
25
39
  end
26
40
 
27
- private
28
- def internal_attributes(attr_name) # :nodoc:
41
+ private
42
+
43
+ def internal_attributes attr_name
29
44
  attr_ptr = cstruct[attr_name.to_sym]
30
45
  return nil if attr_ptr.null?
31
46
 
@@ -37,6 +52,7 @@ module Nokogiri
37
52
  ahash
38
53
  end
39
54
 
55
+ # :startdoc:
40
56
  end
41
57
  end
42
58
  end
@@ -1,20 +1,21 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class Namespace # :nodoc:
3
+ class Namespace
4
+ # :stopdoc:
4
5
 
5
- attr_accessor :cstruct # :nodoc:
6
- attr_accessor :document # :nodoc:
6
+ attr_accessor :cstruct
7
+ attr_accessor :document
7
8
 
8
- def prefix # :nodoc:
9
+ def prefix
9
10
  cstruct[:prefix].nil? ? nil : cstruct[:prefix] # TODO: encoding?
10
11
  end
11
12
 
12
- def href # :nodoc:
13
+ def href
13
14
  cstruct[:href].nil? ? nil : cstruct[:href] # TODO: encoding?
14
15
  end
15
16
 
16
17
  class << self
17
- def wrap(document, node_struct) # :nodoc:
18
+ def wrap(document, node_struct)
18
19
  if node_struct.is_a?(FFI::Pointer)
19
20
  # cast native pointers up into a node cstruct
20
21
  return nil if node_struct.null?
@@ -27,7 +28,7 @@ module Nokogiri
27
28
  ns = Nokogiri::XML::Namespace.allocate
28
29
  ns.document = document.ruby_doc
29
30
  ns.cstruct = node_struct
30
-
31
+
31
32
  ns.cstruct.ruby_node = ns
32
33
 
33
34
  cache = ns.document.instance_variable_get(:@node_cache)
@@ -37,6 +38,7 @@ module Nokogiri
37
38
  end
38
39
  end
39
40
 
41
+ # :startdoc:
40
42
  end
41
43
  end
42
44
  end
@@ -215,6 +215,10 @@ module Nokogiri
215
215
 
216
216
  def add_namespace_definition(prefix, href) # :nodoc:
217
217
  ns = LibXML.xmlNewNs(cstruct, href, prefix)
218
+ if ns.null?
219
+ ns = LibXML.xmlSearchNs(cstruct.document, cstruct,
220
+ prefix.nil? ? nil : prefix.to_s)
221
+ end
218
222
  LibXML.xmlSetNs(cstruct, ns) if prefix.nil?
219
223
  Namespace.wrap(cstruct.document, ns)
220
224
  end
@@ -23,7 +23,10 @@ module Nokogiri
23
23
  raise(ArgumentError, "node_set must be a Nokogiri::XML::NodeSet") unless node_set.is_a?(XML::NodeSet)
24
24
  new_set_ptr = LibXML::xmlXPathNodeSetMerge(nil, self.cstruct)
25
25
  new_set_ptr = LibXML::xmlXPathNodeSetMerge(new_set_ptr, node_set.cstruct)
26
- NodeSet.wrap(new_set_ptr)
26
+
27
+ new_set = NodeSet.wrap(new_set_ptr)
28
+ new_set.document = document
29
+ new_set
27
30
  end
28
31
 
29
32
  def -(node_set) # :nodoc:
@@ -1,7 +1,8 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class RelaxNG < Schema
4
- def validate_document(document) # :nodoc:
4
+ # :stopdoc:
5
+ def validate_document document
5
6
  errors = []
6
7
 
7
8
  ctx = LibXML.xmlRelaxNGNewValidCtxt(cstruct)
@@ -18,8 +19,9 @@ module Nokogiri
18
19
  end
19
20
  private :validate_document
20
21
 
21
- def self.read_memory(content) # :nodoc:
22
- ctx = LibXML.xmlRelaxNGNewMemParserCtxt(content, content.length)
22
+ def self.read_memory content
23
+ content_copy = FFI::MemoryPointer.from_string(content)
24
+ ctx = LibXML.xmlRelaxNGNewMemParserCtxt(content_copy, content.length)
23
25
 
24
26
  errors = []
25
27
 
@@ -46,6 +48,36 @@ module Nokogiri
46
48
  schema.errors = errors
47
49
  schema
48
50
  end
51
+
52
+ def self.from_document document
53
+ ctx = LibXML.xmlRelaxNGNewDocParserCtxt document.document.cstruct
54
+
55
+ errors = []
56
+
57
+ LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(errors))
58
+ LibXML.xmlRelaxNGSetParserStructuredErrors(
59
+ ctx, SyntaxError.error_array_pusher(errors), nil) unless Nokogiri.is_2_6_16?
60
+
61
+ schema_ptr = LibXML.xmlRelaxNGParse(ctx)
62
+
63
+ LibXML.xmlSetStructuredErrorFunc(nil, nil)
64
+ LibXML.xmlRelaxNGFreeParserCtxt(ctx)
65
+
66
+ if schema_ptr.null?
67
+ error = LibXML.xmlGetLastError
68
+ if error
69
+ raise SyntaxError.wrap(error)
70
+ else
71
+ raise RuntimeError, "Could not parse document"
72
+ end
73
+ end
74
+
75
+ schema = allocate
76
+ schema.cstruct = LibXML::XmlRelaxNG.new schema_ptr
77
+ schema.errors = errors
78
+ schema
79
+ end
80
+ # :startdoc:
49
81
  end
50
82
  end
51
83
  end
@@ -2,7 +2,7 @@ module Nokogiri
2
2
  module XML
3
3
  module SAX
4
4
  class Parser
5
-
5
+
6
6
  attr_accessor :cstruct # :nodoc:
7
7
 
8
8
  def parse_memory(data) # :nodoc:
@@ -35,7 +35,7 @@ module Nokogiri
35
35
 
36
36
  def setup_lambdas # :nodoc:
37
37
  @closures = {} # we need to keep references to the closures to avoid GC
38
-
38
+
39
39
  [ :startDocument, :endDocument, :startElement, :endElement, :characters,
40
40
  :comment, :warning, :error, :cdataBlock, :startElementNs, :endElementNs ].each do |sym|
41
41
  @closures[sym] = lambda { |*args| send("__internal__#{sym}", *args) } # "i'm your private dancer", etc.
@@ -97,17 +97,21 @@ module Nokogiri
97
97
  prefix = prefix .null? ? nil : prefix .read_string
98
98
  uri = uri .null? ? nil : uri .read_string
99
99
 
100
- attr_hash = {}
101
- ns_hash = {}
100
+ attr_list = []
101
+ ns_list = []
102
102
 
103
103
  if ! attributes.null?
104
104
  # Each attribute is an array of [localname, prefix, URI, value, end]
105
105
  (0..(nb_attributes-1)*5).step(5) do |j|
106
106
  key = attributes.get_pointer(LibXML.pointer_offset(j)).read_string
107
+ attr_prefix = attributes.get_pointer(LibXML.pointer_offset(j + 1))
108
+ attr_prefix = attr_prefix.null? ? nil : attr_prefix.read_string
109
+ attr_uri = attributes.get_pointer(LibXML.pointer_offset(j + 2))
110
+ attr_uri = attr_uri.null? ? nil : attr_uri.read_string
107
111
  value_length = attributes.get_pointer(LibXML.pointer_offset(j+4)).address \
108
112
  - attributes.get_pointer(LibXML.pointer_offset(j+3)).address
109
113
  value = attributes.get_pointer(LibXML.pointer_offset(j+3)).get_string(0, value_length)
110
- attr_hash[key] = value
114
+ attr_list << Attribute.new(key, attr_prefix, attr_uri, value)
111
115
  end
112
116
  end
113
117
 
@@ -117,16 +121,18 @@ module Nokogiri
117
121
  key = key.null? ? nil : key.read_string
118
122
  value = namespaces.get_pointer(LibXML.pointer_offset(j+1))
119
123
  value = value.null? ? nil : value.read_string
120
- ns_hash[key] = value
124
+ ns_list << [key, value]
121
125
  end
122
126
  end
123
127
 
124
- @document.start_element_ns(localname, attr_hash, prefix, uri, ns_hash)
125
-
126
- if @document.respond_to?(:start_element)
127
- name = prefix ? "#{prefix}:#{localname}" : localname
128
- @document.start_element(name, attr_hash.to_a.flatten)
129
- end
128
+ @document.start_element_namespace(
129
+ localname,
130
+ attr_list,
131
+ prefix,
132
+ uri,
133
+ ns_list
134
+ )
135
+ start_element_namespace(localname, attr_list, prefix, uri, ns_list)
130
136
  end
131
137
 
132
138
  def __internal__endElementNs(_, localname, prefix, uri) # :nodoc:
@@ -134,14 +140,9 @@ module Nokogiri
134
140
  prefix = prefix .null? ? nil : prefix .read_string
135
141
  uri = uri .null? ? nil : uri .read_string
136
142
 
137
- @document.end_element_ns(localname, prefix, uri)
138
-
139
- if @document.respond_to?(:end_element)
140
- name = prefix ? "#{prefix}:#{localname}" : localname
141
- @document.end_element(name)
142
- end
143
+ @document.end_element_namespace(localname, prefix, uri)
144
+ end_element_namespace(localname, prefix, uri)
143
145
  end
144
-
145
146
  end
146
147
  end
147
148
  end
@@ -1,10 +1,11 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class Schema
4
+ # :stopdoc:
4
5
 
5
- attr_accessor :cstruct # :nodoc:
6
+ attr_accessor :cstruct
6
7
 
7
- def validate_document(document) # :nodoc:
8
+ def validate_document document
8
9
  errors = []
9
10
 
10
11
  ctx = LibXML.xmlSchemaNewValidCtxt(cstruct)
@@ -21,8 +22,9 @@ module Nokogiri
21
22
  end
22
23
  private :validate_document
23
24
 
24
- def self.read_memory(content) # :nodoc:
25
- ctx = LibXML.xmlSchemaNewMemParserCtxt(content, content.length)
25
+ def self.read_memory content
26
+ content_copy = FFI::MemoryPointer.from_string(content)
27
+ ctx = LibXML.xmlSchemaNewMemParserCtxt(content_copy, content.length)
26
28
 
27
29
  errors = []
28
30
 
@@ -49,6 +51,41 @@ module Nokogiri
49
51
  schema
50
52
  end
51
53
 
54
+ def self.from_document document
55
+ ctx = LibXML.xmlSchemaNewDocParserCtxt(document.document.cstruct)
56
+
57
+ errors = []
58
+
59
+ LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(errors))
60
+ unless Nokogiri.is_2_6_16?
61
+ LibXML.xmlSchemaSetParserStructuredErrors(
62
+ ctx,
63
+ SyntaxError.error_array_pusher(errors),
64
+ nil
65
+ )
66
+ end
67
+
68
+ schema_ptr = LibXML.xmlSchemaParse(ctx)
69
+
70
+ LibXML.xmlSetStructuredErrorFunc(nil, nil)
71
+ LibXML.xmlSchemaFreeParserCtxt(ctx)
72
+
73
+ if schema_ptr.null?
74
+ error = LibXML.xmlGetLastError
75
+ if error
76
+ raise SyntaxError.wrap(error)
77
+ else
78
+ raise RuntimeError, "Could not parse document"
79
+ end
80
+ end
81
+
82
+ schema = allocate
83
+ schema.cstruct = LibXML::XmlSchema.new schema_ptr
84
+ schema.errors = errors
85
+ schema
86
+ end
87
+
88
+ # :startdoc:
52
89
  end
53
90
  end
54
91
  end
@@ -8,7 +8,7 @@ module Nokogiri
8
8
  class << self
9
9
  ###
10
10
  # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
11
- def HTML thing, url = nil, encoding = nil, options = 2145, &block
11
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
12
12
  Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
13
13
  end
14
14
  end
@@ -17,7 +17,7 @@ module Nokogiri
17
17
  class << self
18
18
  ###
19
19
  # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
20
- def parse thing, url = nil, encoding = nil, options = 2145, &block
20
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
21
21
  Document.parse(thing, url, encoding, options, &block)
22
22
  end
23
23
 
@@ -44,9 +44,9 @@ Please change to #{self.class}#serialize(:encoding => enc, :save_with => opts)
44
44
  # +url+ is resource where this document is located. +encoding+ is the
45
45
  # encoding that should be used when processing the document. +options+
46
46
  # is a number that sets options in the parser, such as
47
- # Nokogiri::XML::PARSE_RECOVER. See the constants in
48
- # Nokogiri::XML.
49
- def parse string_or_io, url = nil, encoding = nil, options = 2145, &block
47
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
48
+ # Nokogiri::XML::ParseOptions.
49
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
50
50
 
51
51
  options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
52
52
  # Give the options to the user