nokogiri 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (75) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.autotest +5 -6
  3. data/CHANGELOG.ja.rdoc +47 -11
  4. data/CHANGELOG.rdoc +31 -0
  5. data/Manifest.txt +8 -1
  6. data/README.ja.rdoc +4 -3
  7. data/README.rdoc +9 -1
  8. data/Rakefile +4 -0
  9. data/deps.rip +5 -0
  10. data/ext/nokogiri/extconf.rb +4 -0
  11. data/ext/nokogiri/html_element_description.c +1 -1
  12. data/ext/nokogiri/nokogiri.c +7 -0
  13. data/ext/nokogiri/nokogiri.h +4 -1
  14. data/ext/nokogiri/xml_document.c +3 -5
  15. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  16. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  17. data/ext/nokogiri/xml_namespace.c +8 -0
  18. data/ext/nokogiri/xml_namespace.h +1 -0
  19. data/ext/nokogiri/xml_node.c +61 -41
  20. data/ext/nokogiri/xml_node_set.c +22 -14
  21. data/ext/nokogiri/xml_sax_parser.c +0 -3
  22. data/ext/nokogiri/xml_sax_parser_context.c +2 -0
  23. data/ext/nokogiri/xml_sax_push_parser.c +26 -3
  24. data/ext/nokogiri/xml_syntax_error.c +18 -227
  25. data/lib/nokogiri/css/generated_parser.rb +173 -160
  26. data/lib/nokogiri/css/generated_tokenizer.rb +4 -1
  27. data/lib/nokogiri/css/parser.y +4 -1
  28. data/lib/nokogiri/css/tokenizer.rex +2 -1
  29. data/lib/nokogiri/css/xpath_visitor.rb +2 -0
  30. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  31. data/lib/nokogiri/ffi/html/element_description.rb +5 -9
  32. data/lib/nokogiri/ffi/libxml.rb +21 -5
  33. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  34. data/lib/nokogiri/ffi/structs/xml_document.rb +3 -3
  35. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +110 -1
  36. data/lib/nokogiri/ffi/xml/dtd.rb +2 -4
  37. data/lib/nokogiri/ffi/xml/node.rb +38 -17
  38. data/lib/nokogiri/ffi/xml/node_set.rb +21 -8
  39. data/lib/nokogiri/ffi/xml/reader.rb +1 -1
  40. data/lib/nokogiri/ffi/xml/sax/parser.rb +1 -8
  41. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +16 -4
  42. data/lib/nokogiri/ffi/xml/syntax_error.rb +9 -2
  43. data/lib/nokogiri/ffi/xslt/stylesheet.rb +12 -9
  44. data/lib/nokogiri/version.rb +1 -1
  45. data/lib/nokogiri/xml/builder.rb +1 -1
  46. data/lib/nokogiri/xml/document.rb +35 -4
  47. data/lib/nokogiri/xml/document_fragment.rb +5 -1
  48. data/lib/nokogiri/xml/fragment_handler.rb +28 -20
  49. data/lib/nokogiri/xml/node.rb +84 -13
  50. data/lib/nokogiri/xml/node_set.rb +19 -2
  51. data/lib/nokogiri/xml/sax/push_parser.rb +1 -1
  52. data/lib/nokogiri/xml/syntax_error.rb +10 -5
  53. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  54. data/lib/xsd/xmlparser/nokogiri.rb +20 -1
  55. data/test/css/test_parser.rb +5 -0
  56. data/test/css/test_tokenizer.rb +7 -0
  57. data/test/helper.rb +0 -5
  58. data/test/html/test_document_fragment.rb +39 -1
  59. data/test/html/test_node.rb +14 -0
  60. data/test/test_encoding_handler.rb +46 -0
  61. data/test/test_memory_leak.rb +10 -0
  62. data/test/test_nokogiri.rb +5 -1
  63. data/test/test_soap4r_sax.rb +52 -0
  64. data/test/test_xslt_transforms.rb +69 -26
  65. data/test/xml/sax/test_parser_context.rb +7 -0
  66. data/test/xml/sax/test_push_parser.rb +33 -0
  67. data/test/xml/test_document.rb +27 -1
  68. data/test/xml/test_document_fragment.rb +6 -0
  69. data/test/xml/test_node.rb +63 -214
  70. data/test/xml/test_node_reparenting.rb +261 -0
  71. data/test/xml/test_node_set.rb +51 -0
  72. data/test/xml/test_syntax_error.rb +0 -15
  73. metadata +35 -5
  74. metadata.gz.sig +0 -0
  75. data/test/test_gc.rb +0 -15
@@ -78,18 +78,25 @@ module Nokogiri
78
78
  end
79
79
 
80
80
  def to_a # :nodoc:
81
- cstruct.nodeTab.collect { |node| Node.wrap(node) }
81
+ cstruct.nodeTab.collect do |node|
82
+ node_cstruct = LibXML::XmlNode.new(node)
83
+ if node_cstruct[:type] == XML::Node::NAMESPACE_DECL
84
+ Namespace.wrap(document.cstruct, node)
85
+ else
86
+ Node.wrap(node_cstruct)
87
+ end
88
+ end
82
89
  end
83
90
 
84
91
  def unlink # :nodoc:
85
- # TODO: is this simpler implementation viable:
86
- # cstruct.nodeTab.collect {|node| Node.wrap(node)}.each(&:unlink)
87
- # ?
88
92
  nodetab = cstruct.nodeTab
89
93
  cstruct[:nodeNr].times do |j|
90
- node = Node.wrap(nodetab[j])
91
- node.unlink
92
- nodetab[j] = node.cstruct.pointer
94
+ node_cstruct = LibXML::XmlNode.new(nodetab[j])
95
+ if node_cstruct[:type] != XML::Node::NAMESPACE_DECL
96
+ node = Node.wrap(node_cstruct)
97
+ node.unlink
98
+ nodetab[j] = node.cstruct.pointer
99
+ end
93
100
  end
94
101
  cstruct.nodeTab = nodetab
95
102
  self
@@ -114,7 +121,13 @@ module Nokogiri
114
121
  def index_at(number) # :nodoc:
115
122
  return nil if (number >= cstruct[:nodeNr] || number.abs > cstruct[:nodeNr])
116
123
  number = number + cstruct[:nodeNr] if number < 0
117
- Node.wrap(cstruct.nodeAt(number))
124
+ node_ptr = cstruct.nodeAt(number)
125
+ node_cstruct = LibXML::XmlNode.new(node_ptr)
126
+ if node_cstruct[:type] == XML::Node::NAMESPACE_DECL
127
+ Namespace.wrap(document.cstruct, node_ptr)
128
+ else
129
+ Node.wrap(node_cstruct)
130
+ end
118
131
  end
119
132
 
120
133
  def subseq(beg, len) # :nodoc:
@@ -60,7 +60,7 @@ module Nokogiri
60
60
  if attr_ptr.null?
61
61
  # this section is an attempt to workaround older versions of libxml that
62
62
  # don't handle namespaces properly in all attribute-and-friends functions
63
- prefix_ptr = FFI::MemoryPointer.new :pointer
63
+ prefix_ptr = FFI::Buffer.new :pointer
64
64
  localname = LibXML.xmlSplitQName2(name, prefix_ptr)
65
65
  prefix = prefix_ptr.get_pointer(0)
66
66
  if ! localname.null?
@@ -48,14 +48,7 @@ module Nokogiri
48
48
  end
49
49
 
50
50
  def __internal__startElement(_, name, attributes)
51
- attrs = []
52
- unless attributes.null?
53
- j = 0
54
- while ! (value = attributes.get_pointer(LibXML.pointer_offset(j))).null?
55
- attrs << value.read_string
56
- j += 1
57
- end
58
- end
51
+ attrs = attributes.null? ? [] : attributes.get_array_of_string(0)
59
52
  @document.start_element name, attrs
60
53
  end
61
54
 
@@ -5,6 +5,17 @@ module Nokogiri
5
5
 
6
6
  attr_accessor :cstruct # :nodoc:
7
7
 
8
+ def options
9
+ cstruct[:options]
10
+ end
11
+
12
+ def options=(user_options)
13
+ if LibXML.xmlCtxtUseOptions(cstruct, user_options) != 0
14
+ raise RuntimeError, "Cannot set XML parser context options"
15
+ end
16
+ nil
17
+ end
18
+
8
19
  private
9
20
 
10
21
  def native_write(chunk, last_chunk) # :nodoc:
@@ -14,10 +25,11 @@ module Nokogiri
14
25
  size = chunk.length
15
26
  end
16
27
 
17
- rcode = LibXML.xmlParseChunk(cstruct, chunk, size, last_chunk ? 1 : 0)
18
- if rcode != 0
19
- error = LibXML.xmlCtxtGetLastError(cstruct)
20
- raise Nokogiri::XML::SyntaxError.wrap(error)
28
+ if LibXML.xmlParseChunk(cstruct, chunk, size, last_chunk ? 1 : 0) != 0
29
+ if (cstruct[:options] & XML::ParseOptions::RECOVER) == 0
30
+ error = LibXML.xmlCtxtGetLastError(cstruct)
31
+ raise Nokogiri::XML::SyntaxError.wrap(error)
32
+ end
21
33
  end
22
34
 
23
35
  self
@@ -7,7 +7,7 @@ module Nokogiri
7
7
 
8
8
  def initialize(message)
9
9
  self.cstruct = LibXML::XmlSyntaxError.new(LibXML::XmlSyntaxError.allocate())
10
- self.cstruct[:message] = FFI::MemoryPointer.from_string(message)
10
+ self.cstruct[:message] = LibXML.xmlStrdup(message)
11
11
  end
12
12
 
13
13
  def domain
@@ -31,7 +31,14 @@ module Nokogiri
31
31
  unless cstruct[:message].null?
32
32
  LibXML.xmlFree(cstruct[:message])
33
33
  end
34
- cstruct[:message] = string
34
+ cstruct[:message] = LibXML.xmlStrdup(string)
35
+ string
36
+ end
37
+
38
+ def initialize_copy(other)
39
+ raise ArgumentError, "node must be a Nokogiri::XML::SyntaxError" unless other.is_a?(Nokogiri::XML::SyntaxError)
40
+ LibXML.xmlCopyError(other.cstruct, cstruct)
41
+ self
35
42
  end
36
43
 
37
44
  def level
@@ -22,19 +22,22 @@ module Nokogiri
22
22
  end
23
23
 
24
24
  def serialize(document) # :nodoc:
25
- buf_ptr = FFI::MemoryPointer.new :pointer
26
- buf_len = FFI::MemoryPointer.new :int
25
+ buf_ptr = FFI::Buffer.new :pointer
26
+ buf_len = FFI::Buffer.new :int
27
27
  LibXML.xsltSaveResultToString(buf_ptr, buf_len, document.cstruct, cstruct)
28
- buf = Nokogiri::LibXML::XmlAlloc.new(buf_ptr.read_pointer)
29
- buf.pointer.read_string(buf_len.read_int)
28
+ buf = Nokogiri::LibXML::XmlAlloc.new(buf_ptr.get_pointer(0))
29
+ buf.pointer.read_string(buf_len.get_int(0))
30
30
  end
31
31
 
32
32
  def transform(document, params=[]) # :nodoc:
33
- param_arr = FFI::MemoryPointer.new(:pointer, params.length + 1)
34
- params.each_with_index do |param, j|
35
- param_arr[j].put_pointer(0, FFI::MemoryPointer.from_string(param.to_s))
36
- end
37
- param_arr[params.length].put_pointer(0,nil)
33
+ param_arr = FFI::MemoryPointer.new(:pointer, params.length + 1, false)
34
+
35
+ # Keep the MemoryPointer instances alive until after the call
36
+ ptrs = params.map { |param | FFI::MemoryPointer.from_string(param.to_s) }
37
+ param_arr.put_array_of_pointer(0, ptrs)
38
+
39
+ # Terminate the list with a NULL pointer
40
+ param_arr.put_pointer(LibXML.pointer_offset(params.length), nil)
38
41
 
39
42
  ptr = LibXML.xsltApplyStylesheet(cstruct, document.cstruct, param_arr)
40
43
  raise(RuntimeError, "could not perform xslt transform on document") if ptr.null?
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.4.0'
3
+ VERSION = '1.4.1'
4
4
 
5
5
  # More complete version information about libxml
6
6
  VERSION_INFO = {}
@@ -269,7 +269,7 @@ module Nokogiri
269
269
  ###
270
270
  # Create a Text Node with content of +string+
271
271
  def text string
272
- insert @doc.create_text_node string
272
+ insert @doc.create_text_node(string)
273
273
  end
274
274
 
275
275
  ###
@@ -60,6 +60,34 @@ module Nokogiri
60
60
  self
61
61
  end
62
62
 
63
+ ###
64
+ # Recursively get all namespaces from this node and its subtree and
65
+ # return them as a hash.
66
+ #
67
+ # For example, given this document:
68
+ #
69
+ # <root xmlns:foo="bar">
70
+ # <bar xmlns:hello="world" />
71
+ # </root>
72
+ #
73
+ # This method will return:
74
+ #
75
+ # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
76
+ #
77
+ # WARNING: this method will clobber duplicate names in the keys.
78
+ # For example, given this document:
79
+ #
80
+ # <root xmlns:foo="bar">
81
+ # <bar xmlns:foo="baz" />
82
+ # </root>
83
+ #
84
+ # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
85
+ def collect_namespaces
86
+ ns = {}
87
+ traverse { |j| ns.merge!(j.namespaces) }
88
+ ns
89
+ end
90
+
63
91
  # Get the list of decorators given +key+
64
92
  def decorators key
65
93
  @decorators ||= Hash.new
@@ -112,13 +140,16 @@ module Nokogiri
112
140
 
113
141
  undef_method :swap, :parent, :namespace, :default_namespace=
114
142
  undef_method :add_namespace_definition, :attributes
115
- undef_method :namespace_definitions, :line
143
+ undef_method :namespace_definitions, :line, :add_namespace
116
144
 
117
145
  def add_child child
118
- if [Node::ELEMENT_NODE, Node::DOCUMENT_FRAG_NODE].include? child.type
119
- raise "Document already has a root node" if root
146
+ raise "Document already has a root node" if root
147
+ if child.type == Node::DOCUMENT_FRAG_NODE
148
+ raise "Document cannot have multiple root nodes" if child.children.size > 1
149
+ super(child.children.first)
150
+ else
151
+ super
120
152
  end
121
- super
122
153
  end
123
154
  alias :<< :add_child
124
155
 
@@ -51,7 +51,11 @@ module Nokogiri
51
51
  ###
52
52
  # Search this fragment. See Nokogiri::XML::Node#css
53
53
  def css *args
54
- children.css(*args)
54
+ if children.any?
55
+ children.css(*args)
56
+ else
57
+ NodeSet.new(document)
58
+ end
55
59
  end
56
60
 
57
61
  alias :serialize :to_s
@@ -8,33 +8,27 @@ module Nokogiri
8
8
  @document = node.document
9
9
  @stack = [node]
10
10
  @html_eh = node.kind_of? HTML::DocumentFragment
11
-
12
- # the regexes used in start_element() and characters() anchor at
13
- # start-of-line, but we really only want them to anchor at
14
- # start-of-doc. so let's only save up to the first newline.
15
- #
16
- # this implementation choice was the result of some benchmarks, if
17
- # you're curious: http://gist.github.com/115936
18
- #
19
- @original_html = original_html.lstrip
20
- newline_index = @original_html.index("\n")
21
- @original_html = @original_html[0,newline_index] if newline_index
11
+ @original_html = prepare_for_regex(original_html.strip)
22
12
  end
23
13
 
24
14
  def start_element name, attrs = []
25
15
  regex = @html_eh ? %r{^\s*<#{Regexp.escape(name)}}i :
26
16
  %r{^\s*<#{Regexp.escape(name)}}
27
17
 
28
- @doc_started = true if @original_html =~ regex
18
+ if ! @doc_started && @original_html =~ regex
19
+ @doc_started = true
20
+ end
29
21
  return unless @doc_started
30
22
 
31
- if match = name.match(QNAME_REGEX)
32
- prefix, name = match[1], match[2]
33
- ns = @document.root.namespace_definitions.detect { |x|
34
- x.prefix == prefix
35
- }
36
- else
37
- ns = nil
23
+ ns = nil
24
+ if @document.root
25
+ match = name.match(QNAME_REGEX)
26
+ if match
27
+ prefix, name = match[1], match[2]
28
+ ns = @document.root.namespace_definitions.detect { |x|
29
+ x.prefix == prefix
30
+ }
31
+ end
38
32
  end
39
33
 
40
34
  node = Element.new(name, @document)
@@ -50,7 +44,7 @@ module Nokogiri
50
44
  end
51
45
 
52
46
  def characters string
53
- @doc_started = true if @original_html.strip =~ %r{^\s*#{Regexp.escape(string.strip)}}
47
+ @doc_started = true
54
48
  @stack.last << Text.new(string, @document)
55
49
  end
56
50
 
@@ -66,6 +60,20 @@ module Nokogiri
66
60
  return unless @stack.last.name == name
67
61
  @stack.pop
68
62
  end
63
+
64
+ private
65
+
66
+ #
67
+ # the regexes used in start_element() and characters() anchor at
68
+ # start-of-line, but we really only want them to anchor at
69
+ # start-of-doc. so let's only save up to the first newline.
70
+ #
71
+ # this implementation choice was the result of some benchmarks, if
72
+ # you're curious: http://gist.github.com/115936
73
+ #
74
+ def prepare_for_regex(string)
75
+ (newline_index = string.index("\n")) ? string.slice(0,newline_index) : string
76
+ end
69
77
  end
70
78
  end
71
79
  end
@@ -192,6 +192,13 @@ module Nokogiri
192
192
  xpath(*rules)
193
193
  end
194
194
 
195
+ ###
196
+ # Search this node's immidiate children using CSS selector +selector+
197
+ def > selector
198
+ ns = document.root.namespaces
199
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
200
+ end
201
+
195
202
  ###
196
203
  # Search for the first occurrence of +path+.
197
204
  # Returns nil if nothing is found, otherwise a Node.
@@ -225,10 +232,59 @@ module Nokogiri
225
232
  get(name.to_s)
226
233
  end
227
234
 
235
+ ###
236
+ # Add +node+ as a child of this Node.
237
+ # The new node must be a Nokogiri::XML::Node or a non-empty String.
238
+ # Returns the new child node.
239
+ def add_child(node)
240
+ Node.verify_nodeishness(node)
241
+ if node.type == DOCUMENT_FRAG_NODE
242
+ node.children.each do |child|
243
+ add_child_node child
244
+ end
245
+ else
246
+ add_child_node node
247
+ end
248
+ end
249
+
250
+ ###
251
+ # Insert +node+ before this Node (as a sibling).
252
+ def add_previous_sibling(node)
253
+ Node.verify_nodeishness(node)
254
+ if node.type == DOCUMENT_FRAG_NODE
255
+ node.children.each do |child|
256
+ add_previous_sibling_node child
257
+ end
258
+ else
259
+ add_previous_sibling_node node
260
+ end
261
+ end
262
+
263
+ ###
264
+ # Insert +node+ after this Node (as a sibling).
265
+ def add_next_sibling(node)
266
+ Node.verify_nodeishness(node)
267
+ if node.type == DOCUMENT_FRAG_NODE
268
+ node.children.reverse.each do |child|
269
+ add_next_sibling_node child
270
+ end
271
+ else
272
+ add_next_sibling_node node
273
+ end
274
+ end
275
+
228
276
  alias :next :next_sibling
229
277
  alias :previous :previous_sibling
278
+
279
+ # :stopdoc:
280
+ # HACK: This is to work around an RDoc bug
281
+ alias :next= :add_next_sibling
282
+ # :startdoc:
283
+
284
+ alias :previous= :add_previous_sibling
230
285
  alias :remove :unlink
231
286
  alias :get_attribute :[]
287
+ alias :attr :[]
232
288
  alias :set_attribute :[]=
233
289
  alias :text :content
234
290
  alias :inner_text :content
@@ -241,8 +297,9 @@ module Nokogiri
241
297
  alias :clone :dup
242
298
 
243
299
  ####
244
- # Returns a hash containing the node's attributes. The key is the
245
- # attribute name, the value is the string value of the attribute.
300
+ # Returns a hash containing the node's attributes. The key is
301
+ # the attribute name, the value is a Nokogiri::XML::Attr
302
+ # representing the attribute.
246
303
  def attributes
247
304
  Hash[*(attribute_nodes.map { |node|
248
305
  [node.node_name, node]
@@ -279,7 +336,7 @@ module Nokogiri
279
336
  ###
280
337
  # Returns true if this Node matches +selector+
281
338
  def matches? selector
282
- document.search(selector).include?(self)
339
+ ancestors.last.search(selector).include?(self)
283
340
  end
284
341
 
285
342
  ####
@@ -435,8 +492,10 @@ module Nokogiri
435
492
 
436
493
  return NodeSet.new(document, parents) unless selector
437
494
 
495
+ root = parents.last
496
+
438
497
  NodeSet.new(document, parents.find_all { |parent|
439
- parent.matches?(selector)
498
+ root.search(selector).include?(parent)
440
499
  })
441
500
  end
442
501
 
@@ -473,16 +532,19 @@ module Nokogiri
473
532
  end
474
533
 
475
534
  ####
476
- # +replace+ this Node with the +new_node+ in the Document. The new node
477
- # must be a Nokogiri::XML::Node or a non-empty String
478
- def replace new_node
479
- if new_node.is_a?(Document) || !new_node.is_a?(XML::Node)
480
- raise ArgumentError, <<-EOERR
481
- Node.replace requires a Node argument, and cannot accept a Document.
482
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
483
- EOERR
535
+ # +replace+ this Node with the +node+ in the Document.
536
+ # The new node must be a Nokogiri::XML::Node or a non-empty String.
537
+ # Returns the new child node.
538
+ def replace node
539
+ Node.verify_nodeishness(node)
540
+ if node.type == DOCUMENT_FRAG_NODE
541
+ node.children.each do |child|
542
+ add_previous_sibling child
543
+ end
544
+ unlink
545
+ else
546
+ replace_node node
484
547
  end
485
- replace_with_node new_node
486
548
  end
487
549
 
488
550
  ###
@@ -657,6 +719,15 @@ Node.replace requires a Node argument, and cannot accept a Document.
657
719
  end
658
720
 
659
721
  private
722
+ def self.verify_nodeishness(node)
723
+ if node.is_a?(Document) || !node.is_a?(XML::Node)
724
+ raise ArgumentError, <<-EOERR
725
+ Node.replace requires a Node argument, and cannot accept a Document.
726
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
727
+ EOERR
728
+ end
729
+ end
730
+
660
731
  def inspect_attributes
661
732
  [:name, :namespace, :attribute_nodes, :children]
662
733
  end