nokogiri 1.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. data/.autotest +14 -2
  2. data/CHANGELOG.ja.rdoc +38 -0
  3. data/CHANGELOG.rdoc +43 -0
  4. data/Manifest.txt +80 -5
  5. data/README.ja.rdoc +12 -11
  6. data/README.rdoc +4 -2
  7. data/Rakefile +103 -173
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +19 -13
  10. data/ext/nokogiri/html_document.c +39 -3
  11. data/ext/nokogiri/html_document.h +1 -1
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.h +1 -1
  15. data/ext/nokogiri/html_sax_parser.h +1 -1
  16. data/ext/nokogiri/{native.c → nokogiri.c} +11 -3
  17. data/ext/nokogiri/{native.h → nokogiri.h} +18 -4
  18. data/ext/nokogiri/xml_attr.c +14 -5
  19. data/ext/nokogiri/xml_attr.h +1 -1
  20. data/ext/nokogiri/xml_cdata.c +15 -6
  21. data/ext/nokogiri/xml_cdata.h +1 -1
  22. data/ext/nokogiri/xml_comment.c +13 -4
  23. data/ext/nokogiri/xml_comment.h +1 -1
  24. data/ext/nokogiri/xml_document.c +50 -41
  25. data/ext/nokogiri/xml_document.h +1 -1
  26. data/ext/nokogiri/xml_document_fragment.c +12 -4
  27. data/ext/nokogiri/xml_document_fragment.h +1 -1
  28. data/ext/nokogiri/xml_dtd.c +1 -1
  29. data/ext/nokogiri/xml_dtd.h +1 -1
  30. data/ext/nokogiri/xml_entity_reference.c +13 -4
  31. data/ext/nokogiri/xml_entity_reference.h +1 -1
  32. data/ext/nokogiri/xml_io.h +1 -1
  33. data/ext/nokogiri/xml_namespace.c +69 -0
  34. data/ext/nokogiri/xml_namespace.h +12 -0
  35. data/ext/nokogiri/xml_node.c +232 -124
  36. data/ext/nokogiri/xml_node.h +3 -4
  37. data/ext/nokogiri/xml_node_set.c +206 -19
  38. data/ext/nokogiri/xml_node_set.h +1 -1
  39. data/ext/nokogiri/xml_processing_instruction.c +14 -4
  40. data/ext/nokogiri/xml_processing_instruction.h +1 -1
  41. data/ext/nokogiri/xml_reader.c +87 -7
  42. data/ext/nokogiri/xml_reader.h +1 -1
  43. data/ext/nokogiri/xml_relax_ng.c +106 -0
  44. data/ext/nokogiri/xml_relax_ng.h +9 -0
  45. data/ext/nokogiri/xml_sax_parser.c +122 -2
  46. data/ext/nokogiri/xml_sax_parser.h +1 -1
  47. data/ext/nokogiri/xml_sax_push_parser.c +1 -0
  48. data/ext/nokogiri/xml_sax_push_parser.h +1 -1
  49. data/ext/nokogiri/xml_schema.c +107 -0
  50. data/ext/nokogiri/xml_schema.h +9 -0
  51. data/ext/nokogiri/xml_syntax_error.h +1 -1
  52. data/ext/nokogiri/xml_text.c +10 -3
  53. data/ext/nokogiri/xml_text.h +1 -1
  54. data/ext/nokogiri/xml_xpath.h +1 -1
  55. data/ext/nokogiri/xml_xpath_context.h +1 -1
  56. data/ext/nokogiri/xslt_stylesheet.c +29 -16
  57. data/ext/nokogiri/xslt_stylesheet.h +1 -1
  58. data/lib/action-nokogiri.rb +7 -1
  59. data/lib/nokogiri.rb +21 -5
  60. data/lib/nokogiri/css/generated_parser.rb +49 -14
  61. data/lib/nokogiri/css/generated_tokenizer.rb +2 -2
  62. data/lib/nokogiri/css/node.rb +13 -3
  63. data/lib/nokogiri/css/parser.rb +8 -0
  64. data/lib/nokogiri/css/parser.y +7 -7
  65. data/lib/nokogiri/css/tokenizer.rb +2 -0
  66. data/lib/nokogiri/css/xpath_visitor.rb +10 -6
  67. data/lib/nokogiri/decorators/hpricot/node.rb +1 -1
  68. data/lib/nokogiri/decorators/hpricot/node_set.rb +2 -2
  69. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +2 -0
  70. data/lib/nokogiri/decorators/slop.rb +3 -1
  71. data/lib/nokogiri/ffi/html/document.rb +37 -0
  72. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  73. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  74. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  75. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  76. data/lib/nokogiri/ffi/libxml.rb +314 -0
  77. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  78. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  79. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  80. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  81. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  82. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  83. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  84. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  85. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  86. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  87. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  88. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  89. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  90. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  91. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  92. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  93. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  94. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  95. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  96. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  97. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  98. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  99. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  100. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  101. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  102. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  103. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  104. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  105. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  106. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  107. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  108. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  109. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  110. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  111. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  112. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  113. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  114. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  115. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  116. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  117. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  118. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  119. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  120. data/lib/nokogiri/hpricot.rb +14 -3
  121. data/lib/nokogiri/html.rb +11 -46
  122. data/lib/nokogiri/html/builder.rb +27 -1
  123. data/lib/nokogiri/html/document.rb +62 -6
  124. data/lib/nokogiri/html/document_fragment.rb +15 -0
  125. data/lib/nokogiri/html/element_description.rb +23 -0
  126. data/lib/nokogiri/html/entity_lookup.rb +2 -0
  127. data/lib/nokogiri/html/sax/parser.rb +27 -1
  128. data/lib/nokogiri/version.rb +26 -1
  129. data/lib/nokogiri/version_warning.rb +11 -0
  130. data/lib/nokogiri/xml.rb +25 -51
  131. data/lib/nokogiri/xml/builder.rb +166 -10
  132. data/lib/nokogiri/xml/cdata.rb +3 -1
  133. data/lib/nokogiri/xml/document.rb +39 -6
  134. data/lib/nokogiri/xml/document_fragment.rb +41 -1
  135. data/lib/nokogiri/xml/dtd.rb +3 -1
  136. data/lib/nokogiri/xml/entity_declaration.rb +3 -1
  137. data/lib/nokogiri/xml/fragment_handler.rb +24 -3
  138. data/lib/nokogiri/xml/namespace.rb +7 -0
  139. data/lib/nokogiri/xml/node.rb +314 -65
  140. data/lib/nokogiri/xml/node/save_options.rb +12 -2
  141. data/lib/nokogiri/xml/node_set.rb +58 -8
  142. data/lib/nokogiri/xml/parse_options.rb +80 -0
  143. data/lib/nokogiri/xml/processing_instruction.rb +2 -0
  144. data/lib/nokogiri/xml/reader.rb +42 -3
  145. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  146. data/lib/nokogiri/xml/sax.rb +0 -7
  147. data/lib/nokogiri/xml/sax/document.rb +84 -0
  148. data/lib/nokogiri/xml/sax/parser.rb +38 -2
  149. data/lib/nokogiri/xml/sax/push_parser.rb +12 -0
  150. data/lib/nokogiri/xml/schema.rb +65 -0
  151. data/lib/nokogiri/xml/syntax_error.rb +11 -0
  152. data/lib/nokogiri/xml/xpath.rb +1 -1
  153. data/lib/nokogiri/xml/xpath_context.rb +2 -0
  154. data/lib/nokogiri/xslt.rb +21 -1
  155. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  156. data/lib/xsd/xmlparser/nokogiri.rb +12 -2
  157. data/tasks/test.rb +42 -19
  158. data/test/css/test_parser.rb +29 -0
  159. data/test/ffi/test_document.rb +35 -0
  160. data/test/files/address_book.rlx +12 -0
  161. data/test/files/address_book.xml +10 -0
  162. data/test/files/po.xml +32 -0
  163. data/test/files/po.xsd +66 -0
  164. data/test/helper.rb +38 -8
  165. data/test/html/sax/test_parser.rb +12 -0
  166. data/test/html/test_builder.rb +25 -2
  167. data/test/html/test_document.rb +91 -20
  168. data/test/html/test_document_fragment.rb +97 -0
  169. data/test/html/test_element_description.rb +95 -0
  170. data/test/html/test_node.rb +66 -3
  171. data/test/test_convert_xpath.rb +1 -1
  172. data/test/test_memory_leak.rb +57 -18
  173. data/test/test_nokogiri.rb +24 -2
  174. data/test/test_reader.rb +77 -0
  175. data/test/test_xslt_transforms.rb +120 -82
  176. data/test/xml/node/test_subclass.rb +44 -0
  177. data/test/xml/sax/test_parser.rb +9 -0
  178. data/test/xml/sax/test_push_parser.rb +24 -0
  179. data/test/xml/test_attr.rb +7 -0
  180. data/test/xml/test_builder.rb +48 -0
  181. data/test/xml/test_cdata.rb +19 -0
  182. data/test/xml/test_comment.rb +6 -0
  183. data/test/xml/test_document.rb +101 -2
  184. data/test/xml/test_document_fragment.rb +55 -3
  185. data/test/xml/test_entity_reference.rb +4 -0
  186. data/test/xml/test_namespace.rb +43 -0
  187. data/test/xml/test_node.rb +255 -8
  188. data/test/xml/test_node_attributes.rb +34 -0
  189. data/test/xml/test_node_encoding.rb +9 -2
  190. data/test/xml/test_node_set.rb +197 -1
  191. data/test/xml/test_parse_options.rb +52 -0
  192. data/test/xml/test_processing_instruction.rb +5 -0
  193. data/test/xml/test_relax_ng.rb +60 -0
  194. data/test/xml/test_schema.rb +65 -0
  195. data/test/xml/test_text.rb +5 -0
  196. data/test/xml/test_unparented_node.rb +3 -3
  197. metadata +128 -12
  198. data/lib/nokogiri/xml/comment.rb +0 -6
  199. data/lib/nokogiri/xml/element.rb +0 -6
  200. data/lib/nokogiri/xml/text.rb +0 -6
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class CDATA < Text
3
+ class CDATA < Nokogiri::XML::Text
4
+ ###
5
+ # Get the name of this CDATA node
4
6
  def name
5
7
  '#cdata-section'
6
8
  end
@@ -11,6 +11,9 @@ module Nokogiri
11
11
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
12
12
  attr_accessor :errors
13
13
 
14
+ def initialize *args
15
+ end
16
+
14
17
  # The name of this document. Always returns "document"
15
18
  def name
16
19
  'document'
@@ -22,7 +25,7 @@ module Nokogiri
22
25
  end
23
26
 
24
27
  # Get the list of decorators given +key+
25
- def decorators(key)
28
+ def decorators key
26
29
  @decorators ||= Hash.new
27
30
  @decorators[key] ||= []
28
31
  end
@@ -40,7 +43,7 @@ module Nokogiri
40
43
 
41
44
  ###
42
45
  # Apply any decorators to +node+
43
- def decorate(node)
46
+ def decorate node
44
47
  return unless @decorators
45
48
  @decorators.each { |klass,list|
46
49
  next unless node.is_a?(klass)
@@ -48,10 +51,6 @@ module Nokogiri
48
51
  }
49
52
  end
50
53
 
51
- def node_cache # :nodoc:
52
- @node_cache ||= {}
53
- end
54
-
55
54
  alias :to_xml :serialize
56
55
  alias :inner_html :serialize
57
56
 
@@ -60,7 +59,41 @@ module Nokogiri
60
59
  root ? root.collect_namespaces : {}
61
60
  end
62
61
 
62
+ ####
63
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
64
+ def fragment tags
65
+ DocumentFragment.new(self, tags)
66
+ end
67
+
63
68
  undef_method :swap, :parent, :namespace
69
+
70
+ class << self
71
+ ###
72
+ # Parse an XML file. +thing+ may be a String, or any object that
73
+ # responds to _read_ and _close_ such as an IO, or StringIO.
74
+ # +url+ is resource where this document is located. +encoding+ is the
75
+ # encoding that should be used when processing the document. +options+
76
+ # is a number that sets options in the parser, such as
77
+ # Nokogiri::XML::PARSE_RECOVER. See the constants in
78
+ # Nokogiri::XML.
79
+ def parse string_or_io, url = nil, encoding = nil, options = 2145, &block
80
+
81
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
82
+ # Give the options to the user
83
+ yield options if block_given?
84
+
85
+ if string_or_io.respond_to?(:read)
86
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
87
+ return self.read_io(string_or_io, url, encoding, options.to_i)
88
+ end
89
+
90
+ # read_memory pukes on empty docs
91
+ return self.new if string_or_io.nil? or string_or_io.empty?
92
+
93
+ self.read_memory(string_or_io, url, encoding, options.to_i)
94
+ end
95
+ end
96
+
64
97
  end
65
98
  end
66
99
  end
@@ -1,9 +1,49 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class DocumentFragment < Node
3
+ class DocumentFragment < Nokogiri::XML::Node
4
+ def initialize document, tags=nil
5
+ if tags
6
+ parser = if self.kind_of?(Nokogiri::HTML::DocumentFragment)
7
+ HTML::SAX::Parser.new(FragmentHandler.new(self, tags))
8
+ else
9
+ XML::SAX::Parser.new(FragmentHandler.new(self, tags))
10
+ end
11
+ parser.parse(tags)
12
+ end
13
+ end
14
+
15
+ ###
16
+ # return the name for DocumentFragment
4
17
  def name
5
18
  '#document-fragment'
6
19
  end
20
+
21
+ def to_s
22
+ children.to_s
23
+ end
24
+
25
+ def to_html *args
26
+ children.to_html(*args)
27
+ end
28
+
29
+ def to_xhtml *args
30
+ children.to_xhtml(*args)
31
+ end
32
+
33
+ def to_xml *args
34
+ children.to_xml(*args)
35
+ end
36
+
37
+ alias :serialize :to_s
38
+
39
+ class << self
40
+ ####
41
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
42
+ def parse tags
43
+ XML::DocumentFragment.new(XML::Document.new, tags)
44
+ end
45
+ end
46
+
7
47
  end
8
48
  end
9
49
  end
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class DTD < Node
3
+ class DTD < Nokogiri::XML::Node
4
+ ###
5
+ # Return attributes for DTD. Always returns +nil+
4
6
  def attributes
5
7
  nil
6
8
  end
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class EntityDeclaration < Node
3
+ class EntityDeclaration < Nokogiri::XML::Node
4
+ ###
5
+ # return attributes. Always returns +nil+
4
6
  def attributes
5
7
  nil
6
8
  end
@@ -3,16 +3,37 @@ module Nokogiri
3
3
  class FragmentHandler < Nokogiri::XML::SAX::Document # :nodoc:
4
4
  def initialize node, original_html
5
5
  @doc_started = false
6
- @original_html = original_html
7
6
  @document = node.document
8
7
  @stack = [node]
8
+ @klass = if node.kind_of?(Nokogiri::HTML::DocumentFragment)
9
+ Nokogiri::HTML::DocumentFragment
10
+ else
11
+ Nokogiri::XML::DocumentFragment
12
+ end
13
+ #
14
+ # the regexes used in start_element() and characters() anchor at
15
+ # start-of-line, but we really only want them to anchor at
16
+ # start-of-doc. so let's only save up to the first newline.
17
+ #
18
+ # this implementation choice was the result of some benchmarks, if
19
+ # you're curious: http://gist.github.com/115936
20
+ #
21
+ newline_index = original_html.index("\n")
22
+ @original_html = if newline_index
23
+ original_html[0,newline_index]
24
+ else
25
+ original_html
26
+ end
9
27
  end
10
28
 
11
29
  def start_element name, attrs = []
12
- @doc_started = true if @original_html =~ /^<#{name}/
30
+ regex = (@klass == Nokogiri::HTML::DocumentFragment) ? %r{^\s*<#{Regexp.escape(name)}}i \
31
+ : %r{^\s*<#{Regexp.escape(name)}}
32
+ @doc_started = true if @original_html =~ regex
13
33
  return unless @doc_started
14
34
 
15
35
  node = Node.new(name, @document)
36
+ attrs << "" unless (attrs.length % 2) == 0
16
37
  Hash[*attrs].each do |k,v|
17
38
  node[k] = v
18
39
  end
@@ -21,7 +42,7 @@ module Nokogiri
21
42
  end
22
43
 
23
44
  def characters string
24
- @doc_started = true if @original_html =~ /^\s*#{string}/
45
+ @doc_started = true if @original_html.strip =~ %r{^\s*#{Regexp.escape(string.strip)}}
25
46
  @stack.last << Nokogiri::XML::Text.new(string, @document)
26
47
  end
27
48
 
@@ -0,0 +1,7 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Namespace
4
+ attr_reader :document
5
+ end
6
+ end
7
+ end
@@ -34,35 +34,57 @@ module Nokogiri
34
34
  #
35
35
  # You may search this node's subtree using Node#xpath and Node#css
36
36
  class Node
37
+ # Element node type, see Nokogiri::XML::Node#element?
37
38
  ELEMENT_NODE = 1
39
+ # Attribute node type
38
40
  ATTRIBUTE_NODE = 2
41
+ # Text node type, see Nokogiri::XML::Node#text?
39
42
  TEXT_NODE = 3
43
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
40
44
  CDATA_SECTION_NODE = 4
45
+ # Entity reference node type
41
46
  ENTITY_REF_NODE = 5
47
+ # Entity node type
42
48
  ENTITY_NODE = 6
49
+ # PI node type
43
50
  PI_NODE = 7
51
+ # Comment node type, see Nokogiri::XML::Node#comment?
44
52
  COMMENT_NODE = 8
53
+ # Document node type, see Nokogiri::XML::Node#xml?
45
54
  DOCUMENT_NODE = 9
55
+ # Document type node type
46
56
  DOCUMENT_TYPE_NODE = 10
57
+ # Document fragment node type
47
58
  DOCUMENT_FRAG_NODE = 11
59
+ # Notation node type
48
60
  NOTATION_NODE = 12
61
+ # HTML document node type, see Nokogiri::XML::Node#html?
49
62
  HTML_DOCUMENT_NODE = 13
63
+ # DTD node type
50
64
  DTD_NODE = 14
65
+ # Element declaration type
51
66
  ELEMENT_DECL = 15
67
+ # Attribute declaration type
52
68
  ATTRIBUTE_DECL = 16
69
+ # Entity declaration type
53
70
  ENTITY_DECL = 17
71
+ # Namespace declaration type
54
72
  NAMESPACE_DECL = 18
73
+ # XInclude start type
55
74
  XINCLUDE_START = 19
75
+ # XInclude end type
56
76
  XINCLUDE_END = 20
77
+ # DOCB document node type
57
78
  DOCB_DOCUMENT_NODE = 21
58
79
 
59
- # The Document associated with this Node.
60
- attr_accessor :document
80
+ def initialize name, document
81
+ # ... Ya. This is empty on purpose.
82
+ end
61
83
 
62
84
  ###
63
85
  # Decorate this node with the decorators set up in this node's Document
64
86
  def decorate!
65
- document.decorate(self) if document
87
+ document.decorate(self)
66
88
  end
67
89
 
68
90
  ###
@@ -150,7 +172,7 @@ module Nokogiri
150
172
  # def regex node_set, regex
151
173
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
152
174
  # end
153
- # })
175
+ # }.new)
154
176
  #
155
177
  def css *rules
156
178
  # Pop off our custom function handler if it exists
@@ -174,6 +196,7 @@ module Nokogiri
174
196
  def at path, ns = document.root ? document.root.namespaces : {}
175
197
  search(path, ns).first
176
198
  end
199
+ alias :% :at
177
200
 
178
201
  ###
179
202
  # Get the attribute value for the attribute +name+
@@ -195,6 +218,7 @@ module Nokogiri
195
218
  alias :name= :node_name=
196
219
  alias :type :node_type
197
220
  alias :to_str :text
221
+ alias :clone :dup
198
222
 
199
223
  ####
200
224
  # Returns a hash containing the node's attributes. The key is the
@@ -232,6 +256,12 @@ module Nokogiri
232
256
  end
233
257
  alias :delete :remove_attribute
234
258
 
259
+ ###
260
+ # Returns true if this Node matches +selector+
261
+ def matches? selector
262
+ document.search(selector).include?(self)
263
+ end
264
+
235
265
  ####
236
266
  # Create nodes from +data+ and insert them before this node
237
267
  # (as a sibling).
@@ -271,23 +301,13 @@ module Nokogiri
271
301
  self
272
302
  end
273
303
 
274
- ####
275
- # Create a Nokogiri::XML::DocumentFragment from +tags+
276
- def fragment tags
277
- classes = document.class.name.split('::')
278
- classes[-1] = 'SAX::Parser'
279
-
280
-
281
- fragment = DocumentFragment.new(self.document)
282
- parser = eval(classes.join('::')).new(
283
- FragmentHandler.new(fragment, tags)
284
- )
285
- parser.parse(tags)
286
- fragment
304
+ def fragment tags # :nodoc:
305
+ # TODO: deprecate?
306
+ document.fragment(tags)
287
307
  end
288
308
 
289
309
  ####
290
- # Set the content to +string+.
310
+ # Set the Node content to +string+. The content gets XML escaped.
291
311
  def content= string
292
312
  self.native_content = encode_special_chars(string.to_s)
293
313
  end
@@ -299,6 +319,21 @@ module Nokogiri
299
319
  parent_node
300
320
  end
301
321
 
322
+ ###
323
+ # Get a hash containing the Namespace definitions for this Node
324
+ def namespaces
325
+ Hash[*namespace_definitions.map { |nd|
326
+ key = ['xmlns', nd.prefix].compact.join(':')
327
+ if defined?(Encoding) && document.encoding
328
+ begin
329
+ key.force_encoding document.encoding
330
+ rescue ArgumentError
331
+ end
332
+ end
333
+ [key, nd.href]
334
+ }.flatten]
335
+ end
336
+
302
337
  # Returns true if this is a Comment
303
338
  def comment?
304
339
  type == COMMENT_NODE
@@ -324,6 +359,16 @@ module Nokogiri
324
359
  type == TEXT_NODE
325
360
  end
326
361
 
362
+ ###
363
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
364
+ # nil on XML documents and on unknown tags.
365
+ def description
366
+ return nil if document.xml?
367
+ Nokogiri::HTML::ElementDescription[name]
368
+ end
369
+
370
+ ###
371
+ # Is this a read only node?
327
372
  def read_only?
328
373
  # According to gdome2, these are read-only node types
329
374
  [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
@@ -335,10 +380,14 @@ module Nokogiri
335
380
  end
336
381
  alias :elem? :element?
337
382
 
383
+ ###
384
+ # Turn this node in to a string. If the document is HTML, this method
385
+ # returns html. If the document is XML, this method returns XML.
338
386
  def to_s
339
387
  document.xml? ? to_xml : to_html
340
388
  end
341
389
 
390
+ # Get the inner_html for this node's Node#children
342
391
  def inner_html
343
392
  children.map { |x| x.to_html }.join
344
393
  end
@@ -359,27 +408,60 @@ module Nokogiri
359
408
  end
360
409
 
361
410
  ###
362
- # Get a list of ancestor Node for this Node
363
- def ancestors
364
- return [] unless respond_to?(:parent)
411
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
412
+ # the ancestors must match +selector+
413
+ def ancestors selector = nil
414
+ return NodeSet.new(document) unless respond_to?(:parent)
415
+ return NodeSet.new(document) unless parent
365
416
 
366
417
  parents = [parent]
367
418
 
368
419
  while parents.last.respond_to?(:parent)
369
- parents << parents.last.parent
420
+ break unless ctx_parent = parents.last.parent
421
+ parents << ctx_parent
370
422
  end
371
- parents
423
+
424
+ return NodeSet.new(document, parents) unless selector
425
+
426
+ NodeSet.new(document, parents.find_all { |parent|
427
+ parent.matches?(selector)
428
+ })
429
+ end
430
+
431
+ ###
432
+ # Set the default namespace for this node to +url+
433
+ def default_namespace= url
434
+ add_namespace_definition(nil, url)
435
+ end
436
+ alias :add_namespace :add_namespace_definition
437
+
438
+ ###
439
+ # Set the namespace for this node to +ns+
440
+ def namespace= ns
441
+ if ns.document != document
442
+ raise ArgumentError, 'namespace must be declared on the same document'
443
+ end
444
+ unless ns.is_a? Nokogiri::XML::Namespace
445
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
446
+ end
447
+ set_namespace ns
372
448
  end
373
449
 
374
450
  ####
375
451
  # Yields self and all children to +block+ recursively.
376
- def traverse(&block)
452
+ def traverse &block
377
453
  children.each{|j| j.traverse(&block) }
378
454
  block.call(self)
379
455
  end
380
456
 
457
+ ###
458
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
459
+ def accept visitor
460
+ visitor.visit(self)
461
+ end
462
+
381
463
  ####
382
- # replace node with the new node in the document.
464
+ # replace this Node with the +new_node+ in the Document.
383
465
  def replace(new_node)
384
466
  if new_node.is_a?(Document) || !new_node.is_a?(XML::Node)
385
467
  raise ArgumentError, <<-EOERR
@@ -399,97 +481,264 @@ Node.replace requires a Node argument, and cannot accept a Document.
399
481
  end
400
482
 
401
483
  ###
402
- # Serialize Node using +encoding+ and +save_options+. Save options
484
+ # Serialize Node using +options+. Save options
403
485
  # can also be set using a block. See SaveOptions.
404
486
  #
405
487
  # These two statements are equivalent:
406
488
  #
407
- # node.serialize('UTF-8', FORMAT | AS_XML)
489
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
408
490
  #
409
491
  # or
410
492
  #
411
- # node.serialize('UTF-8') do |config|
493
+ # node.serialize(:encoding => 'UTF-8') do |config|
412
494
  # config.format.as_xml
413
495
  # end
414
496
  #
415
- def serialize encoding = nil, save_options = SaveOptions::FORMAT, &block
497
+ def serialize *args, &block
498
+ if args.first && !args.first.is_a?(Hash)
499
+ $stderr.puts(<<-eowarn)
500
+ #{self.class}#serialize(encoding, save_opts) is deprecated and will be removed in
501
+ Nokogiri version 1.4.0 *or* after June 1 2009.
502
+ You called serialize from here:
503
+
504
+ #{caller.first}
505
+
506
+ Please change to #{self.class}#serialize(:encoding => enc, :save_with => opts)
507
+ eowarn
508
+ end
509
+
510
+ options = args.first.is_a?(Hash) ? args.shift : {
511
+ :encoding => args[0],
512
+ :save_with => args[1] || SaveOptions::FORMAT
513
+ }
514
+
416
515
  io = StringIO.new
417
- write_to io, encoding, save_options, &block
516
+ write_to io, options, &block
418
517
  io.rewind
419
518
  io.read
420
519
  end
421
520
 
422
521
  ###
423
- # Serialize this Node to HTML using +encoding+
424
- def to_html encoding = nil
522
+ # Serialize this Node to HTML
523
+ #
524
+ # doc.to_html
525
+ #
526
+ # See Node#write_to for a list of +options+. For formatted output,
527
+ # use Node#to_xhtml instead.
528
+ def to_html options = {}
529
+ if options.is_a?(String)
530
+ $stderr.puts(<<-eowarn)
531
+ Node#to_html(encoding) is deprecated and will be removed in
532
+ Nokogiri version 1.4.0 *or* after June 1 2009.
533
+ You called to_html from here:
534
+
535
+ #{caller.first}
536
+
537
+ Please change to Node#to_html(:encoding => #{options})
538
+ eowarn
539
+ options = { :encoding => options }
540
+ end
541
+
425
542
  # FIXME: this is a hack around broken libxml versions
426
543
  return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
427
544
 
428
- serialize(encoding, SaveOptions::FORMAT |
429
- SaveOptions::NO_DECLARATION |
430
- SaveOptions::NO_EMPTY_TAGS |
431
- SaveOptions::AS_HTML)
545
+ options[:save_with] ||= SaveOptions::FORMAT |
546
+ SaveOptions::NO_DECLARATION |
547
+ SaveOptions::NO_EMPTY_TAGS |
548
+ SaveOptions::AS_HTML
549
+
550
+ serialize(options)
432
551
  end
433
552
 
434
553
  ###
435
- # Serialize this Node to XML using +encoding+
436
- def to_xml encoding = nil
437
- serialize(encoding, SaveOptions::FORMAT | SaveOptions::AS_XML)
554
+ # Serialize this Node to XML using +options+
555
+ #
556
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
557
+ #
558
+ # See Node#write_to for a list of +options+
559
+ def to_xml options = {}
560
+ encoding = nil
561
+
562
+ # FIXME add a deprecation warning
563
+ if options.is_a? String
564
+ $stderr.puts(<<-eowarn)
565
+ Node#to_xml(encoding) is deprecated and will be removed in
566
+ Nokogiri version 1.4.0 *or* after June 1 2009.
567
+ You called to_xml from here:
568
+
569
+ #{caller.first}
570
+
571
+ Please change to Node#to_xml(:encoding => #{options})
572
+ eowarn
573
+ options = {
574
+ :encoding => options
575
+ }
576
+ end
577
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
578
+
579
+ serialize(options)
438
580
  end
439
581
 
440
582
  ###
441
- # Serialize this Node to XML using +encoding+
442
- def to_xhtml encoding = nil
583
+ # Serialize this Node to XHTML using +options+
584
+ #
585
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
586
+ #
587
+ # See Node#write_to for a list of +options+
588
+ def to_xhtml options = {}
589
+ if options.is_a?(String)
590
+ options = { :encoding => options }
591
+ $stderr.puts(<<-eowarn)
592
+ Node#to_xml(encoding) is deprecated and will be removed in
593
+ Nokogiri version 1.4.0 *or* after June 1 2009.
594
+ You called to_xhtml from here:
595
+
596
+ #{caller.first}
597
+
598
+ Please change to Node#to_xhtml(:encoding => #{options})
599
+ eowarn
600
+ end
601
+
443
602
  # FIXME: this is a hack around broken libxml versions
444
603
  return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
445
604
 
446
- serialize(encoding, SaveOptions::FORMAT |
447
- SaveOptions::NO_DECLARATION |
448
- SaveOptions::NO_EMPTY_TAGS |
449
- SaveOptions::AS_XHTML)
605
+ options[:save_with] ||= SaveOptions::FORMAT |
606
+ SaveOptions::NO_DECLARATION |
607
+ SaveOptions::NO_EMPTY_TAGS |
608
+ SaveOptions::AS_XHTML
609
+
610
+ serialize(options)
450
611
  end
451
612
 
452
613
  ###
453
- # Write Node to +io+ with +encoding+ and +save_options+
454
- def write_to io, encoding = nil, save_options = SaveOptions::FORMAT
614
+ # Write Node to +io+ with +options+. +options+ modify the output of
615
+ # this method. Valid options are:
616
+ #
617
+ # * +:encoding+ for changing the encoding
618
+ # * +:indent_text+ the indentation text, defaults to one space
619
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
620
+ # * +:save_with+ a combination of SaveOptions constants.
621
+ #
622
+ # To save with UTF-8 indented twice:
623
+ #
624
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
625
+ #
626
+ # To save indented with two dashes:
627
+ #
628
+ # node.write_to(io, :indent_text => '-', :indent => 2
629
+ #
630
+ def write_to io, *options
631
+ if options.length > 0 && !options.first.is_a?(Hash)
632
+ $stderr.puts(<<-eowarn)
633
+ Node#write_to(io, encoding, save_options) is deprecated and will be removed in
634
+ Nokogiri version 1.4.0 *or* after June 1 2009.
635
+ You called write_to from here:
636
+
637
+ #{caller.first}
638
+
639
+ Please change to: Node#write_to(io, :encoding => e, :save_options => opts)
640
+ eowarn
641
+ end
642
+
643
+ options = options.first.is_a?(Hash) ? options.shift : {}
644
+ encoding = options[:encoding] || options[0]
645
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
646
+ indent_text = options[:indent_text] || ' '
647
+ indent_times = options[:indent] || 2
648
+
649
+
455
650
  config = SaveOptions.new(save_options)
456
651
  yield config if block_given?
457
652
 
458
- native_write_to(io, encoding, config.options)
653
+ native_write_to(io, encoding, indent_text * indent_times, config.options)
459
654
  end
460
655
 
461
656
  ###
462
- # Write Node as HTML to +io+ with +encoding+
463
- def write_html_to io, encoding = nil
464
- write_to io, encoding, SaveOptions::FORMAT |
657
+ # Write Node as HTML to +io+ with +options+
658
+ #
659
+ # See Node#write_to for a list of +options+
660
+ def write_html_to io, options = {}
661
+ if options.is_a?(String)
662
+ $stderr.puts(<<-eowarn)
663
+ Node#write_html_to(io, encoding) is deprecated and will be removed in
664
+ Nokogiri version 1.4.0 *or* after June 1 2009.
665
+ You called write_html_to from here:
666
+
667
+ #{caller.first}
668
+
669
+ Please change to Node#write_html_to(io, :encoding => #{options})
670
+ eowarn
671
+ options = { :encoding => options }
672
+ end
673
+
674
+ # FIXME: this is a hack around broken libxml versions
675
+ return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
676
+
677
+ options[:save_with] ||= SaveOptions::FORMAT |
465
678
  SaveOptions::NO_DECLARATION |
466
679
  SaveOptions::NO_EMPTY_TAGS |
467
680
  SaveOptions::AS_HTML
681
+ write_to io, options
468
682
  end
469
683
 
470
684
  ###
471
- # Write Node as XHTML to +io+ with +encoding+
472
- def write_xhtml_to io, encoding = nil
473
- write_to io, encoding, SaveOptions::FORMAT |
685
+ # Write Node as XHTML to +io+ with +options+
686
+ #
687
+ # See Node#write_to for a list of +options+
688
+ def write_xhtml_to io, options = {}
689
+ if options.is_a?(String)
690
+ $stderr.puts(<<-eowarn)
691
+ Node#write_xhtml_to(io, encoding) is deprecated and will be removed in
692
+ Nokogiri version 1.4.0 *or* after June 1 2009.
693
+ You called write_xhtml_to from here:
694
+
695
+ #{caller.first}
696
+
697
+ Please change to Node#write_xhtml_to(io, :encoding => #{options})
698
+ eowarn
699
+ options = { :encoding => options }
700
+ end
701
+
702
+ # FIXME: this is a hack around broken libxml versions
703
+ return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
704
+
705
+ options[:save_with] ||= SaveOptions::FORMAT |
474
706
  SaveOptions::NO_DECLARATION |
475
707
  SaveOptions::NO_EMPTY_TAGS |
476
708
  SaveOptions::AS_XHTML
709
+ write_to io, options
477
710
  end
478
711
 
479
712
  ###
480
- # Write Node as XML to +io+ with +encoding+
481
- def write_xml_to io, encoding = nil
482
- write_to io, encoding, SaveOptions::FORMAT | SaveOptions::AS_XML
713
+ # Write Node as XML to +io+ with +options+
714
+ #
715
+ # doc.write_xml_to io, :encoding => 'UTF-8'
716
+ #
717
+ # See Node#write_to for a list of options
718
+ def write_xml_to io, options = {}
719
+ if options.is_a?(String)
720
+ $stderr.puts(<<-eowarn)
721
+ Node#write_xml_to(io, encoding) is deprecated and will be removed in
722
+ Nokogiri version 1.4.0 *or* after June 1 2009.
723
+ You called write_xml_to from here:
724
+
725
+ #{caller.first}
726
+
727
+ Please change to Node#write_xml_to(io, :encoding => #{options})
728
+ eowarn
729
+ options = { :encoding => options }
730
+ end
731
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
732
+ write_to io, options
483
733
  end
484
734
 
485
- # Create a new node from +string+
486
- #
487
- # THIS METHOD IS DEPRECATED
488
- # This method is deprecated and will be removed in 1.3.0 or by
489
- # March 1, 2009. Instead, use Nokogiri::XML::Node#fragment()
490
- def self.new_from_str string
491
- $stderr.puts("This method is deprecated and will be removed in 1.3.0 or by March 1, 2009. Instead, use Nokogiri::XML::Node#fragment")
492
- Nokogiri::HTML.fragment(string).first
735
+ ###
736
+ # Compare two Node objects with respect to their Document. Nodes from
737
+ # different documents cannot be compared.
738
+ def <=> other
739
+ return nil unless other.is_a?(Nokogiri::XML::Node)
740
+ return nil unless document == other.document
741
+ compare other
493
742
  end
494
743
  end
495
744
  end