rubyjedi-nokogiri_java 1.4.0.20100513161003-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +341 -0
  4. data/Manifest.txt +277 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +125 -0
  7. data/Rakefile +307 -0
  8. data/bin/nokogiri +49 -0
  9. data/deps.rip +5 -0
  10. data/ext/nokogiri/extconf.rb +149 -0
  11. data/ext/nokogiri/html_document.c +145 -0
  12. data/ext/nokogiri/html_document.h +10 -0
  13. data/ext/nokogiri/html_element_description.c +272 -0
  14. data/ext/nokogiri/html_element_description.h +10 -0
  15. data/ext/nokogiri/html_entity_lookup.c +32 -0
  16. data/ext/nokogiri/html_entity_lookup.h +8 -0
  17. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  18. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  19. data/ext/nokogiri/nokogiri.c +96 -0
  20. data/ext/nokogiri/nokogiri.h +148 -0
  21. data/ext/nokogiri/xml_attr.c +92 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +54 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +52 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +386 -0
  30. data/ext/nokogiri/xml_document.h +24 -0
  31. data/ext/nokogiri/xml_document_fragment.c +46 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +192 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +97 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +50 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +31 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_namespace.c +82 -0
  48. data/ext/nokogiri/xml_namespace.h +13 -0
  49. data/ext/nokogiri/xml_node.c +1080 -0
  50. data/ext/nokogiri/xml_node.h +13 -0
  51. data/ext/nokogiri/xml_node_set.c +405 -0
  52. data/ext/nokogiri/xml_node_set.h +9 -0
  53. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  54. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  55. data/ext/nokogiri/xml_reader.c +593 -0
  56. data/ext/nokogiri/xml_reader.h +10 -0
  57. data/ext/nokogiri/xml_relax_ng.c +159 -0
  58. data/ext/nokogiri/xml_relax_ng.h +9 -0
  59. data/ext/nokogiri/xml_sax_parser.c +283 -0
  60. data/ext/nokogiri/xml_sax_parser.h +43 -0
  61. data/ext/nokogiri/xml_sax_parser_context.c +157 -0
  62. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  63. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  64. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  65. data/ext/nokogiri/xml_schema.c +156 -0
  66. data/ext/nokogiri/xml_schema.h +9 -0
  67. data/ext/nokogiri/xml_syntax_error.c +52 -0
  68. data/ext/nokogiri/xml_syntax_error.h +13 -0
  69. data/ext/nokogiri/xml_text.c +48 -0
  70. data/ext/nokogiri/xml_text.h +9 -0
  71. data/ext/nokogiri/xml_xpath.c +53 -0
  72. data/ext/nokogiri/xml_xpath.h +11 -0
  73. data/ext/nokogiri/xml_xpath_context.c +239 -0
  74. data/ext/nokogiri/xml_xpath_context.h +9 -0
  75. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  76. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  77. data/lib/isorelax.jar +0 -0
  78. data/lib/jing.jar +0 -0
  79. data/lib/nekodtd.jar +0 -0
  80. data/lib/nekohtml.jar +0 -0
  81. data/lib/nokogiri.rb +123 -0
  82. data/lib/nokogiri/css.rb +25 -0
  83. data/lib/nokogiri/css/generated_parser.rb +659 -0
  84. data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
  85. data/lib/nokogiri/css/node.rb +99 -0
  86. data/lib/nokogiri/css/parser.rb +82 -0
  87. data/lib/nokogiri/css/parser.y +230 -0
  88. data/lib/nokogiri/css/syntax_error.rb +7 -0
  89. data/lib/nokogiri/css/tokenizer.rb +7 -0
  90. data/lib/nokogiri/css/tokenizer.rex +55 -0
  91. data/lib/nokogiri/css/xpath_visitor.rb +164 -0
  92. data/lib/nokogiri/decorators/slop.rb +33 -0
  93. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  94. data/lib/nokogiri/ffi/html/document.rb +28 -0
  95. data/lib/nokogiri/ffi/html/element_description.rb +81 -0
  96. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  97. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  98. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  99. data/lib/nokogiri/ffi/libxml.rb +372 -0
  100. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  101. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  102. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  103. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  104. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  105. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  106. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  107. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  109. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  110. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  111. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  112. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  113. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  114. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  115. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  116. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  117. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  118. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  119. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  120. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  121. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
  122. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  123. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  124. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  125. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  126. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  127. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  128. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  129. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  130. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  131. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  132. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  133. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  134. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  135. data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
  136. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  137. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  138. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  139. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  140. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  141. data/lib/nokogiri/ffi/xml/node.rb +465 -0
  142. data/lib/nokogiri/ffi/xml/node_set.rb +146 -0
  143. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  144. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  145. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  146. data/lib/nokogiri/ffi/xml/sax/parser.rb +135 -0
  147. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  148. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +55 -0
  149. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  150. data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
  151. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  152. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  153. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  154. data/lib/nokogiri/ffi/xslt/stylesheet.rb +50 -0
  155. data/lib/nokogiri/html.rb +36 -0
  156. data/lib/nokogiri/html/builder.rb +35 -0
  157. data/lib/nokogiri/html/document.rb +88 -0
  158. data/lib/nokogiri/html/document_fragment.rb +15 -0
  159. data/lib/nokogiri/html/element_description.rb +23 -0
  160. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  161. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  162. data/lib/nokogiri/html/sax/parser.rb +48 -0
  163. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  164. data/lib/nokogiri/nokogiri.jar +0 -0
  165. data/lib/nokogiri/syntax_error.rb +4 -0
  166. data/lib/nokogiri/version.rb +33 -0
  167. data/lib/nokogiri/version_warning.rb +11 -0
  168. data/lib/nokogiri/xml.rb +67 -0
  169. data/lib/nokogiri/xml/attr.rb +14 -0
  170. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  171. data/lib/nokogiri/xml/builder.rb +405 -0
  172. data/lib/nokogiri/xml/cdata.rb +11 -0
  173. data/lib/nokogiri/xml/character_data.rb +7 -0
  174. data/lib/nokogiri/xml/document.rb +163 -0
  175. data/lib/nokogiri/xml/document_fragment.rb +73 -0
  176. data/lib/nokogiri/xml/dtd.rb +11 -0
  177. data/lib/nokogiri/xml/element_content.rb +36 -0
  178. data/lib/nokogiri/xml/element_decl.rb +13 -0
  179. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  180. data/lib/nokogiri/xml/fragment_handler.rb +73 -0
  181. data/lib/nokogiri/xml/namespace.rb +13 -0
  182. data/lib/nokogiri/xml/node.rb +730 -0
  183. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  184. data/lib/nokogiri/xml/node_set.rb +318 -0
  185. data/lib/nokogiri/xml/notation.rb +6 -0
  186. data/lib/nokogiri/xml/parse_options.rb +85 -0
  187. data/lib/nokogiri/xml/pp.rb +2 -0
  188. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  189. data/lib/nokogiri/xml/pp/node.rb +56 -0
  190. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  191. data/lib/nokogiri/xml/reader.rb +74 -0
  192. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  193. data/lib/nokogiri/xml/sax.rb +4 -0
  194. data/lib/nokogiri/xml/sax/document.rb +160 -0
  195. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  196. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  197. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  198. data/lib/nokogiri/xml/schema.rb +61 -0
  199. data/lib/nokogiri/xml/syntax_error.rb +43 -0
  200. data/lib/nokogiri/xml/xpath.rb +10 -0
  201. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  202. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  203. data/lib/nokogiri/xslt.rb +48 -0
  204. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  205. data/lib/xercesImpl.jar +0 -0
  206. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  207. data/tasks/test.rb +100 -0
  208. data/test/css/test_nthiness.rb +159 -0
  209. data/test/css/test_parser.rb +282 -0
  210. data/test/css/test_tokenizer.rb +190 -0
  211. data/test/css/test_xpath_visitor.rb +76 -0
  212. data/test/ffi/test_document.rb +35 -0
  213. data/test/files/2ch.html +108 -0
  214. data/test/files/address_book.rlx +12 -0
  215. data/test/files/address_book.xml +10 -0
  216. data/test/files/bar/bar.xsd +4 -0
  217. data/test/files/dont_hurt_em_why.xml +422 -0
  218. data/test/files/exslt.xml +8 -0
  219. data/test/files/exslt.xslt +35 -0
  220. data/test/files/foo/foo.xsd +4 -0
  221. data/test/files/po.xml +32 -0
  222. data/test/files/po.xsd +66 -0
  223. data/test/files/shift_jis.html +10 -0
  224. data/test/files/shift_jis.xml +5 -0
  225. data/test/files/snuggles.xml +3 -0
  226. data/test/files/staff.dtd +10 -0
  227. data/test/files/staff.xml +59 -0
  228. data/test/files/staff.xslt +32 -0
  229. data/test/files/tlm.html +850 -0
  230. data/test/files/valid_bar.xml +2 -0
  231. data/test/helper.rb +137 -0
  232. data/test/html/sax/test_parser.rb +83 -0
  233. data/test/html/sax/test_parser_context.rb +48 -0
  234. data/test/html/test_builder.rb +164 -0
  235. data/test/html/test_document.rb +385 -0
  236. data/test/html/test_document_encoding.rb +77 -0
  237. data/test/html/test_document_fragment.rb +157 -0
  238. data/test/html/test_element_description.rb +98 -0
  239. data/test/html/test_named_characters.rb +14 -0
  240. data/test/html/test_node.rb +242 -0
  241. data/test/html/test_node_encoding.rb +27 -0
  242. data/test/test_convert_xpath.rb +135 -0
  243. data/test/test_css_cache.rb +45 -0
  244. data/test/test_encoding_handler.rb +46 -0
  245. data/test/test_jruby.rb +40 -0
  246. data/test/test_memory_leak.rb +87 -0
  247. data/test/test_nokogiri.rb +140 -0
  248. data/test/test_reader.rb +358 -0
  249. data/test/test_soap4r_sax.rb +52 -0
  250. data/test/test_xslt_transforms.rb +150 -0
  251. data/test/xml/node/test_save_options.rb +20 -0
  252. data/test/xml/node/test_subclass.rb +44 -0
  253. data/test/xml/sax/test_parser.rb +314 -0
  254. data/test/xml/sax/test_parser_context.rb +63 -0
  255. data/test/xml/sax/test_push_parser.rb +135 -0
  256. data/test/xml/test_attr.rb +38 -0
  257. data/test/xml/test_attribute_decl.rb +90 -0
  258. data/test/xml/test_builder.rb +167 -0
  259. data/test/xml/test_cdata.rb +38 -0
  260. data/test/xml/test_comment.rb +29 -0
  261. data/test/xml/test_document.rb +638 -0
  262. data/test/xml/test_document_encoding.rb +26 -0
  263. data/test/xml/test_document_fragment.rb +149 -0
  264. data/test/xml/test_dtd.rb +92 -0
  265. data/test/xml/test_dtd_encoding.rb +33 -0
  266. data/test/xml/test_element_content.rb +56 -0
  267. data/test/xml/test_element_decl.rb +73 -0
  268. data/test/xml/test_entity_decl.rb +83 -0
  269. data/test/xml/test_entity_reference.rb +21 -0
  270. data/test/xml/test_namespace.rb +70 -0
  271. data/test/xml/test_node.rb +740 -0
  272. data/test/xml/test_node_attributes.rb +34 -0
  273. data/test/xml/test_node_encoding.rb +107 -0
  274. data/test/xml/test_node_reparenting.rb +279 -0
  275. data/test/xml/test_node_set.rb +577 -0
  276. data/test/xml/test_parse_options.rb +52 -0
  277. data/test/xml/test_processing_instruction.rb +30 -0
  278. data/test/xml/test_reader_encoding.rb +126 -0
  279. data/test/xml/test_relax_ng.rb +60 -0
  280. data/test/xml/test_schema.rb +89 -0
  281. data/test/xml/test_syntax_error.rb +12 -0
  282. data/test/xml/test_text.rb +30 -0
  283. data/test/xml/test_unparented_node.rb +381 -0
  284. data/test/xml/test_xpath.rb +169 -0
  285. metadata +477 -0
@@ -0,0 +1,11 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CDATA < Nokogiri::XML::Text
4
+ ###
5
+ # Get the name of this CDATA node
6
+ def name
7
+ '#cdata-section'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CharacterData < Nokogiri::XML::Node
4
+ include Nokogiri::XML::PP::CharacterData
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,163 @@
1
+ module Nokogiri
2
+ module XML
3
+ ####
4
+ # Nokogiri::XML::Document is the main entry point for dealing with
5
+ # XML documents. The Document is created by parsing an XML document.
6
+ # See Nokogiri.XML()
7
+ #
8
+ # For searching a Document, see Nokogiri::XML::Node#css and
9
+ # Nokogiri::XML::Node#xpath
10
+ class Document < Node
11
+ ###
12
+ # Parse an XML file. +thing+ may be a String, or any object that
13
+ # responds to _read_ and _close_ such as an IO, or StringIO.
14
+ # +url+ is resource where this document is located. +encoding+ is the
15
+ # encoding that should be used when processing the document. +options+
16
+ # is a number that sets options in the parser, such as
17
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
18
+ # Nokogiri::XML::ParseOptions.
19
+ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
20
+
21
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
22
+ # Give the options to the user
23
+ yield options if block_given?
24
+
25
+ if string_or_io.respond_to?(:read)
26
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
27
+ return read_io(string_or_io, url, encoding, options.to_i)
28
+ end
29
+
30
+ # read_memory pukes on empty docs
31
+ return new if string_or_io.nil? or string_or_io.empty?
32
+
33
+ read_memory(string_or_io, url, encoding, options.to_i)
34
+ end
35
+
36
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
37
+ attr_accessor :errors
38
+
39
+ def initialize *args
40
+ @decorators = nil
41
+ end
42
+
43
+ # Create an element with +name+
44
+ def create_element name, &block
45
+ Nokogiri::XML::Element.new(name, self, &block)
46
+ end
47
+
48
+ # Create a text node with +text+
49
+ def create_text_node text, &block
50
+ Nokogiri::XML::Text.new(text.to_s, self, &block)
51
+ end
52
+
53
+ # The name of this document. Always returns "document"
54
+ def name
55
+ 'document'
56
+ end
57
+
58
+ # A reference to +self+
59
+ def document
60
+ self
61
+ end
62
+
63
+ ###
64
+ # Recursively get all namespaces from this node and its subtree and
65
+ # return them as a hash.
66
+ #
67
+ # For example, given this document:
68
+ #
69
+ # <root xmlns:foo="bar">
70
+ # <bar xmlns:hello="world" />
71
+ # </root>
72
+ #
73
+ # This method will return:
74
+ #
75
+ # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
76
+ #
77
+ # WARNING: this method will clobber duplicate names in the keys.
78
+ # For example, given this document:
79
+ #
80
+ # <root xmlns:foo="bar">
81
+ # <bar xmlns:foo="baz" />
82
+ # </root>
83
+ #
84
+ # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
85
+ def collect_namespaces
86
+ ns = {}
87
+ traverse { |j| ns.merge!(j.namespaces) }
88
+ ns
89
+ end
90
+
91
+ # Get the list of decorators given +key+
92
+ def decorators key
93
+ @decorators ||= Hash.new
94
+ @decorators[key] ||= []
95
+ end
96
+
97
+ ###
98
+ # Validate this Document against it's DTD. Returns a list of errors on
99
+ # the document or +nil+ when there is no DTD.
100
+ def validate
101
+ return nil unless internal_subset
102
+ internal_subset.validate self
103
+ end
104
+
105
+ ###
106
+ # Explore a document with shortcut methods.
107
+ def slop!
108
+ unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
109
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
110
+ decorate!
111
+ end
112
+
113
+ self
114
+ end
115
+
116
+ ###
117
+ # Apply any decorators to +node+
118
+ def decorate node
119
+ return unless @decorators
120
+ @decorators.each { |klass,list|
121
+ next unless node.is_a?(klass)
122
+ list.each { |moodule| node.extend(moodule) }
123
+ }
124
+ end
125
+
126
+ alias :to_xml :serialize
127
+ alias :clone :dup
128
+
129
+ # Get the hash of namespaces on the root Nokogiri::XML::Node
130
+ def namespaces
131
+ root ? root.namespaces : {}
132
+ end
133
+
134
+ ####
135
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
136
+ # Returns an empty fragment if +tags+ is nil.
137
+ def fragment tags = nil
138
+ DocumentFragment.new(self, tags)
139
+ end
140
+
141
+ undef_method :swap, :parent, :namespace, :default_namespace=
142
+ undef_method :add_namespace_definition, :attributes
143
+ undef_method :namespace_definitions, :add_namespace
144
+ undef_method :line if method_defined?(:line)
145
+
146
+ def add_child child
147
+ raise "Document already has a root node" if root
148
+ if child.type == Node::DOCUMENT_FRAG_NODE
149
+ raise "Document cannot have multiple root nodes" if child.children.size > 1
150
+ super(child.children.first)
151
+ else
152
+ super
153
+ end
154
+ end
155
+ alias :<< :add_child
156
+
157
+ private
158
+ def inspect_attributes
159
+ [:name, :children]
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,73 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DocumentFragment < Nokogiri::XML::Node
4
+ def initialize document, tags=nil
5
+ if tags
6
+ if self.kind_of?(Nokogiri::HTML::DocumentFragment)
7
+ HTML::SAX::Parser.new(FragmentHandler.new(self, tags)).parse(tags)
8
+ else
9
+ wrapped = "<div>#{tags.strip}</div>"
10
+ XML::SAX::Parser.new(FragmentHandler.new(self, wrapped)).parse(wrapped)
11
+ div = self.child
12
+ div.children.each { |child| child.parent = self }
13
+ div.unlink
14
+ end
15
+ end
16
+ end
17
+
18
+ ###
19
+ # return the name for DocumentFragment
20
+ def name
21
+ '#document-fragment'
22
+ end
23
+
24
+ ###
25
+ # Convert this DocumentFragment to a string
26
+ def to_s
27
+ children.to_s
28
+ end
29
+
30
+ ###
31
+ # Convert this DocumentFragment to html
32
+ # See Nokogiri::XML::NodeSet#to_html
33
+ def to_html *args
34
+ children.to_html(*args)
35
+ end
36
+
37
+ ###
38
+ # Convert this DocumentFragment to xhtml
39
+ # See Nokogiri::XML::NodeSet#to_xhtml
40
+ def to_xhtml *args
41
+ children.to_xhtml(*args)
42
+ end
43
+
44
+ ###
45
+ # Convert this DocumentFragment to xml
46
+ # See Nokogiri::XML::NodeSet#to_xml
47
+ def to_xml *args
48
+ children.to_xml(*args)
49
+ end
50
+
51
+ ###
52
+ # Search this fragment. See Nokogiri::XML::Node#css
53
+ def css *args
54
+ if children.any?
55
+ children.css(*args)
56
+ else
57
+ NodeSet.new(document)
58
+ end
59
+ end
60
+
61
+ alias :serialize :to_s
62
+
63
+ class << self
64
+ ####
65
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
66
+ def parse tags
67
+ self.new(XML::Document.new, tags)
68
+ end
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,11 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DTD < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :content
6
+ undef_method :namespace
7
+ undef_method :namespace_definitions
8
+ undef_method :line if method_defined?(:line)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,36 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Represents the allowed content in an Element Declaration inside a DTD:
5
+ #
6
+ # <?xml version="1.0"?><?TEST-STYLE PIDATA?>
7
+ # <!DOCTYPE staff SYSTEM "staff.dtd" [
8
+ # <!ELEMENT div1 (head, (p | list | note)*, div2*)>
9
+ # ]>
10
+ # </root>
11
+ #
12
+ # ElementContent represents the tree inside the <!ELEMENT> tag shown above
13
+ # that lists the possible content for the div1 tag.
14
+ class ElementContent
15
+ # Possible definitions of type
16
+ PCDATA = 1
17
+ ELEMENT = 2
18
+ SEQ = 3
19
+ OR = 4
20
+
21
+ # Possible content occurrences
22
+ ONCE = 1
23
+ OPT = 2
24
+ MULT = 3
25
+ PLUS = 4
26
+
27
+ attr_reader :document
28
+
29
+ ###
30
+ # Get the children of this ElementContent node
31
+ def children
32
+ [c1, c2].compact
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ElementDecl < Nokogiri::XML::Node
4
+ undef_method :namespace
5
+ undef_method :namespace_definitions
6
+ undef_method :line if method_defined?(:line)
7
+
8
+ def inspect
9
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ module Nokogiri
2
+ module XML
3
+ class EntityDecl < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :attributes
6
+ undef_method :namespace
7
+ undef_method :namespace_definitions
8
+ undef_method :line if method_defined?(:line)
9
+
10
+ def inspect
11
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,73 @@
1
+ module Nokogiri
2
+ module XML
3
+ class FragmentHandler < Nokogiri::XML::SAX::Document # :nodoc:
4
+ QNAME_REGEX = /(.*):(.*)/
5
+
6
+ def initialize node, original_html
7
+ @doc_started = false
8
+ @document = node.document
9
+ @stack = [node]
10
+ @html_eh = node.kind_of? HTML::DocumentFragment
11
+
12
+ # the regexes used in start_element() and characters() anchor at
13
+ # start-of-line, but we really only want them to anchor at
14
+ # start-of-doc. so let's only save up to the first newline.
15
+ #
16
+ # this implementation choice was the result of some benchmarks, if
17
+ # you're curious: http://gist.github.com/115936
18
+ #
19
+ @original_html = original_html.lstrip
20
+ newline_index = @original_html.index("\n")
21
+ @original_html = @original_html[0,newline_index] if newline_index
22
+ end
23
+
24
+ def start_element name, attrs = []
25
+ regex = @html_eh ? %r{^\s*<#{Regexp.escape(name)}}i :
26
+ %r{^\s*<#{Regexp.escape(name)}}
27
+
28
+ @doc_started = true if @original_html =~ regex
29
+ return unless @doc_started
30
+
31
+ ns = nil
32
+ if @document.root
33
+ match = name.match(QNAME_REGEX)
34
+ if match
35
+ prefix, name = match[1], match[2]
36
+ ns = @document.root.namespace_definitions.detect { |x|
37
+ x.prefix == prefix
38
+ }
39
+ end
40
+ end
41
+
42
+ node = Element.new(name, @document)
43
+ attrs << "" unless (attrs.length % 2) == 0
44
+ Hash[*attrs].each do |k,v|
45
+ node[k] = v
46
+ end
47
+
48
+ node.namespace = ns if ns
49
+
50
+ @stack.last << node
51
+ @stack << node
52
+ end
53
+
54
+ def characters string
55
+ @doc_started = true if @original_html.strip =~ %r{^\s*#{Regexp.escape(string.strip)}}
56
+ @stack.last << Text.new(string, @document)
57
+ end
58
+
59
+ def comment string
60
+ @stack.last << Comment.new(@document, string)
61
+ end
62
+
63
+ def cdata_block string
64
+ @stack.last << CDATA.new(@document, string)
65
+ end
66
+
67
+ def end_element name
68
+ return unless @stack.last.name == name
69
+ @stack.pop
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Namespace
4
+ include Nokogiri::XML::PP::Node
5
+ attr_reader :document
6
+
7
+ private
8
+ def inspect_attributes
9
+ [:prefix, :href]
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,730 @@
1
+ require 'stringio'
2
+ require 'nokogiri/xml/node/save_options'
3
+
4
+ module Nokogiri
5
+ module XML
6
+ ####
7
+ # Nokogiri::XML::Node is your window to the fun filled world of dealing
8
+ # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
9
+ # to a hash with regard to attributes. For example (from irb):
10
+ #
11
+ # irb(main):004:0> node
12
+ # => <a href="#foo" id="link">link</a>
13
+ # irb(main):005:0> node['href']
14
+ # => "#foo"
15
+ # irb(main):006:0> node.keys
16
+ # => ["href", "id"]
17
+ # irb(main):007:0> node.values
18
+ # => ["#foo", "link"]
19
+ # irb(main):008:0> node['class'] = 'green'
20
+ # => "green"
21
+ # irb(main):009:0> node
22
+ # => <a href="#foo" id="link" class="green">link</a>
23
+ # irb(main):010:0>
24
+ #
25
+ # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
26
+ #
27
+ # Nokogiri::XML::Node also has methods that let you move around your
28
+ # tree. For navigating your tree, see:
29
+ #
30
+ # * Nokogiri::XML::Node#parent
31
+ # * Nokogiri::XML::Node#children
32
+ # * Nokogiri::XML::Node#next
33
+ # * Nokogiri::XML::Node#previous
34
+ #
35
+ # You may search this node's subtree using Node#xpath and Node#css
36
+ class Node
37
+ include Nokogiri::XML::PP::Node
38
+
39
+ # Element node type, see Nokogiri::XML::Node#element?
40
+ ELEMENT_NODE = 1
41
+ # Attribute node type
42
+ ATTRIBUTE_NODE = 2
43
+ # Text node type, see Nokogiri::XML::Node#text?
44
+ TEXT_NODE = 3
45
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
46
+ CDATA_SECTION_NODE = 4
47
+ # Entity reference node type
48
+ ENTITY_REF_NODE = 5
49
+ # Entity node type
50
+ ENTITY_NODE = 6
51
+ # PI node type
52
+ PI_NODE = 7
53
+ # Comment node type, see Nokogiri::XML::Node#comment?
54
+ COMMENT_NODE = 8
55
+ # Document node type, see Nokogiri::XML::Node#xml?
56
+ DOCUMENT_NODE = 9
57
+ # Document type node type
58
+ DOCUMENT_TYPE_NODE = 10
59
+ # Document fragment node type
60
+ DOCUMENT_FRAG_NODE = 11
61
+ # Notation node type
62
+ NOTATION_NODE = 12
63
+ # HTML document node type, see Nokogiri::XML::Node#html?
64
+ HTML_DOCUMENT_NODE = 13
65
+ # DTD node type
66
+ DTD_NODE = 14
67
+ # Element declaration type
68
+ ELEMENT_DECL = 15
69
+ # Attribute declaration type
70
+ ATTRIBUTE_DECL = 16
71
+ # Entity declaration type
72
+ ENTITY_DECL = 17
73
+ # Namespace declaration type
74
+ NAMESPACE_DECL = 18
75
+ # XInclude start type
76
+ XINCLUDE_START = 19
77
+ # XInclude end type
78
+ XINCLUDE_END = 20
79
+ # DOCB document node type
80
+ DOCB_DOCUMENT_NODE = 21
81
+
82
+ def initialize name, document
83
+ # ... Ya. This is empty on purpose.
84
+ end
85
+
86
+ ###
87
+ # Decorate this node with the decorators set up in this node's Document
88
+ def decorate!
89
+ document.decorate(self)
90
+ end
91
+
92
+ ###
93
+ # Search this node for +paths+. +paths+ can be XPath or CSS, and an
94
+ # optional hash of namespaces may be appended.
95
+ # See Node#xpath and Node#css.
96
+ def search *paths
97
+ ns = paths.last.is_a?(Hash) ? paths.pop :
98
+ (document.root ? document.root.namespaces : {})
99
+ xpath(*(paths.map { |path|
100
+ path = path.to_s
101
+ path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
102
+ path,
103
+ :prefix => ".//",
104
+ :ns => ns
105
+ )
106
+ }.flatten.uniq) + [ns])
107
+ end
108
+ alias :/ :search
109
+
110
+ ###
111
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
112
+ # queries. A hash of namespaces may be appended. For example:
113
+ #
114
+ # node.xpath('.//title')
115
+ # node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
116
+ # node.xpath('.//xmlns:name', node.root.namespaces)
117
+ #
118
+ # Custom XPath functions may also be defined. To define custom functions
119
+ # create a class and implement the # function you want to define.
120
+ # For example:
121
+ #
122
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
123
+ # def regex node_set, regex
124
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
125
+ # end
126
+ # }.new)
127
+ #
128
+ def xpath *paths
129
+ # Pop off our custom function handler if it exists
130
+ handler = ![
131
+ Hash, String, Symbol
132
+ ].include?(paths.last.class) ? paths.pop : nil
133
+
134
+ ns = paths.last.is_a?(Hash) ? paths.pop :
135
+ (document.root ? document.root.namespaces : {})
136
+
137
+ return NodeSet.new(document) unless document
138
+
139
+ sets = paths.map { |path|
140
+ ctx = XPathContext.new(self)
141
+ ctx.register_namespaces(ns)
142
+ path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
143
+ set = ctx.evaluate(path, handler).node_set
144
+ set.document = document
145
+ document.decorate(set)
146
+ set
147
+ }
148
+ return sets.first if sets.length == 1
149
+
150
+ NodeSet.new(document) do |combined|
151
+ document.decorate(combined)
152
+ sets.each do |set|
153
+ set.each do |node|
154
+ combined << node
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ ###
161
+ # Search this node for CSS +rules+. +rules+ must be one or more CSS
162
+ # selectors. For example:
163
+ #
164
+ # node.css('title')
165
+ # node.css('body h1.bold')
166
+ # node.css('div + p.green', 'div#one')
167
+ #
168
+ # Custom CSS pseudo classes may also be defined. To define custom pseudo
169
+ # classes, create a class and implement the custom pseudo class you
170
+ # want defined. The first argument to the method will be the current
171
+ # matching NodeSet. Any other arguments are ones that you pass in.
172
+ # For example:
173
+ #
174
+ # node.css('title:regex("\w+")', Class.new {
175
+ # def regex node_set, regex
176
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
177
+ # end
178
+ # }.new)
179
+ #
180
+ def css *rules
181
+ # Pop off our custom function handler if it exists
182
+ handler = ![
183
+ Hash, String, Symbol
184
+ ].include?(rules.last.class) ? rules.pop : nil
185
+
186
+ ns = rules.last.is_a?(Hash) ? rules.pop :
187
+ (document.root ? document.root.namespaces : {})
188
+
189
+ rules = rules.map { |rule|
190
+ xpath_rule = CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
191
+ }.flatten.uniq + [ns, handler].compact
192
+
193
+ xpath(*rules)
194
+ end
195
+
196
+ ###
197
+ # Search this node's immidiate children using CSS selector +selector+
198
+ def > selector
199
+ ns = document.root.namespaces
200
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
201
+ end
202
+
203
+ ###
204
+ # Search for the first occurrence of +path+.
205
+ # Returns nil if nothing is found, otherwise a Node.
206
+ def at path, ns = document.root ? document.root.namespaces : {}
207
+ search(path, ns).first
208
+ end
209
+ alias :% :at
210
+
211
+ ##
212
+ # Search this node for the first occurrence of XPath +paths+.
213
+ # Equivalent to <tt>xpath(paths).first</tt>
214
+ # See Node#xpath for more information.
215
+ #
216
+ def at_xpath *paths
217
+ xpath(*paths).first
218
+ end
219
+
220
+ ##
221
+ # Search this node for the first occurrence of CSS +rules+.
222
+ # Equivalent to <tt>css(rules).first</tt>
223
+ # See Node#css for more information.
224
+ #
225
+ def at_css *rules
226
+ css(*rules).first
227
+ end
228
+
229
+ ###
230
+ # Get the attribute value for the attribute +name+
231
+ def [] name
232
+ return nil unless key?(name.to_s)
233
+ get(name.to_s)
234
+ end
235
+
236
+ ###
237
+ # Add +node+ as a child of this Node.
238
+ # The new node must be a Nokogiri::XML::Node or a non-empty String.
239
+ # Returns the new child node.
240
+ def add_child(node)
241
+ Node.verify_nodeishness(node)
242
+ if node.type == DOCUMENT_FRAG_NODE
243
+ node.children.each do |child|
244
+ add_child_node child
245
+ end
246
+ else
247
+ add_child_node node
248
+ end
249
+ end
250
+
251
+ ###
252
+ # Insert +node+ before this Node (as a sibling).
253
+ def add_previous_sibling(node)
254
+ Node.verify_nodeishness(node)
255
+ if node.type == DOCUMENT_FRAG_NODE
256
+ node.children.each do |child|
257
+ add_previous_sibling_node child
258
+ end
259
+ else
260
+ add_previous_sibling_node node
261
+ end
262
+ end
263
+
264
+ ###
265
+ # Insert +node+ after this Node (as a sibling).
266
+ def add_next_sibling(node)
267
+ Node.verify_nodeishness(node)
268
+ if node.type == DOCUMENT_FRAG_NODE
269
+ node.children.reverse.each do |child|
270
+ add_next_sibling_node child
271
+ end
272
+ else
273
+ add_next_sibling_node node
274
+ end
275
+ end
276
+
277
+ alias :next :next_sibling
278
+ alias :previous :previous_sibling
279
+ alias :remove :unlink
280
+ alias :get_attribute :[]
281
+ alias :attr :[]
282
+ alias :set_attribute :[]=
283
+ alias :text :content
284
+ alias :inner_text :content
285
+ alias :has_attribute? :key?
286
+ alias :<< :add_child
287
+ alias :name :node_name
288
+ alias :name= :node_name=
289
+ alias :type :node_type
290
+ alias :to_str :text
291
+ alias :clone :dup
292
+
293
+ ####
294
+ # Returns a hash containing the node's attributes. The key is
295
+ # the attribute name, the value is a Nokogiri::XML::Attr
296
+ # representing the attribute.
297
+ def attributes
298
+ Hash[*(attribute_nodes.map { |node|
299
+ [node.node_name, node]
300
+ }.flatten)]
301
+ end
302
+
303
+ ###
304
+ # Get the attribute values for this Node.
305
+ def values
306
+ attribute_nodes.map { |node| node.value }
307
+ end
308
+
309
+ ###
310
+ # Get the attribute names for this Node.
311
+ def keys
312
+ attribute_nodes.map { |node| node.node_name }
313
+ end
314
+
315
+ ###
316
+ # Iterate over each attribute name and value pair for this Node.
317
+ def each &block
318
+ attribute_nodes.each { |node|
319
+ block.call(node.node_name, node.value)
320
+ }
321
+ end
322
+
323
+ ###
324
+ # Remove the attribute named +name+
325
+ def remove_attribute name
326
+ attributes[name].remove if key? name
327
+ end
328
+ alias :delete :remove_attribute
329
+
330
+ ###
331
+ # Returns true if this Node matches +selector+
332
+ def matches? selector
333
+ ancestors.last.search(selector).include?(self)
334
+ end
335
+
336
+ ####
337
+ # Create nodes from +data+ and insert them before this node
338
+ # (as a sibling).
339
+ def before data
340
+ fragment(data).children.each do |node|
341
+ add_previous_sibling node
342
+ end
343
+ self
344
+ end
345
+
346
+ ####
347
+ # Create nodes from +data+ and insert them after this node
348
+ # (as a sibling).
349
+ def after data
350
+ fragment(data).children.to_a.reverse.each do |node|
351
+ add_next_sibling node
352
+ end
353
+ self
354
+ end
355
+
356
+ ####
357
+ # Swap this Node for new nodes made from +data+
358
+ def swap data
359
+ before(data)
360
+ remove
361
+ self
362
+ end
363
+
364
+ ####
365
+ # Set the inner_html for this Node to +tags+
366
+ def inner_html= tags
367
+ children.each { |x| x.remove}
368
+
369
+ fragment(tags).children.to_a.each do |node|
370
+ add_child node
371
+ end
372
+ self
373
+ end
374
+
375
+ def fragment tags # :nodoc:
376
+ # TODO: deprecate?
377
+ document.fragment(tags)
378
+ end
379
+
380
+ ####
381
+ # Set the Node content to +string+. The content gets XML escaped.
382
+ def content= string
383
+ self.native_content = encode_special_chars(string.to_s)
384
+ end
385
+
386
+ ###
387
+ # Set the parent Node for this Node
388
+ def parent= parent_node
389
+ parent_node.add_child(self)
390
+ parent_node
391
+ end
392
+
393
+ ###
394
+ # Get a hash containing the Namespace definitions for this Node
395
+ def namespaces
396
+ Hash[*namespace_definitions.map { |nd|
397
+ key = ['xmlns', nd.prefix].compact.join(':')
398
+ if RUBY_VERSION >= '1.9' && document.encoding
399
+ begin
400
+ key.force_encoding document.encoding
401
+ rescue ArgumentError
402
+ end
403
+ end
404
+ [key, nd.href]
405
+ }.flatten]
406
+ end
407
+
408
+ # Returns true if this is a Comment
409
+ def comment?
410
+ type == COMMENT_NODE
411
+ end
412
+
413
+ # Returns true if this is a CDATA
414
+ def cdata?
415
+ type == CDATA_SECTION_NODE
416
+ end
417
+
418
+ # Returns true if this is an XML::Document node
419
+ def xml?
420
+ type == DOCUMENT_NODE
421
+ end
422
+
423
+ # Returns true if this is an HTML::Document node
424
+ def html?
425
+ type == HTML_DOCUMENT_NODE
426
+ end
427
+
428
+ # Returns true if this is a Text node
429
+ def text?
430
+ type == TEXT_NODE
431
+ end
432
+
433
+ ###
434
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
435
+ # nil on XML documents and on unknown tags.
436
+ def description
437
+ return nil if document.xml?
438
+ Nokogiri::HTML::ElementDescription[name]
439
+ end
440
+
441
+ ###
442
+ # Is this a read only node?
443
+ def read_only?
444
+ # According to gdome2, these are read-only node types
445
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
446
+ end
447
+
448
+ # Returns true if this is an Element node
449
+ def element?
450
+ type == ELEMENT_NODE
451
+ end
452
+ alias :elem? :element?
453
+
454
+ ###
455
+ # Turn this node in to a string. If the document is HTML, this method
456
+ # returns html. If the document is XML, this method returns XML.
457
+ def to_s
458
+ document.xml? ? to_xml : to_html
459
+ end
460
+
461
+ # Get the inner_html for this node's Node#children
462
+ def inner_html *args
463
+ children.map { |x| x.to_html(*args) }.join
464
+ end
465
+
466
+ # Get the path to this node as a CSS expression
467
+ def css_path
468
+ path.split(/\//).map { |part|
469
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
470
+ }.compact.join(' > ')
471
+ end
472
+
473
+ ###
474
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
475
+ # the ancestors must match +selector+
476
+ def ancestors selector = nil
477
+ return NodeSet.new(document) unless respond_to?(:parent)
478
+ return NodeSet.new(document) unless parent
479
+
480
+ parents = [parent]
481
+
482
+ while parents.last.respond_to?(:parent)
483
+ break unless ctx_parent = parents.last.parent
484
+ parents << ctx_parent
485
+ end
486
+
487
+ return NodeSet.new(document, parents) unless selector
488
+
489
+ root = parents.last
490
+
491
+ NodeSet.new(document, parents.find_all { |parent|
492
+ root.search(selector).include?(parent)
493
+ })
494
+ end
495
+
496
+ ###
497
+ # Set the default namespace for this node to +url+
498
+ def default_namespace= url
499
+ add_namespace_definition(nil, url)
500
+ end
501
+ alias :add_namespace :add_namespace_definition
502
+
503
+ ###
504
+ # Set the namespace for this node to +ns+
505
+ def namespace= ns
506
+ if ns.document != document
507
+ raise ArgumentError, 'namespace must be declared on the same document'
508
+ end
509
+ unless ns.is_a? Nokogiri::XML::Namespace
510
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
511
+ end
512
+ set_namespace ns
513
+ end
514
+
515
+ ####
516
+ # Yields self and all children to +block+ recursively.
517
+ def traverse &block
518
+ children.each{|j| j.traverse(&block) }
519
+ block.call(self)
520
+ end
521
+
522
+ ###
523
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
524
+ def accept visitor
525
+ visitor.visit(self)
526
+ end
527
+
528
+ ####
529
+ # +replace+ this Node with the +node+ in the Document.
530
+ # The new node must be a Nokogiri::XML::Node or a non-empty String.
531
+ # Returns the new child node.
532
+ def replace node
533
+ Node.verify_nodeishness(node)
534
+ if node.type == DOCUMENT_FRAG_NODE
535
+ node.children.each do |child|
536
+ add_previous_sibling child
537
+ end
538
+ unlink
539
+ else
540
+ replace_node node
541
+ end
542
+ end
543
+
544
+ ###
545
+ # Test to see if this Node is equal to +other+
546
+ def == other
547
+ return false unless other
548
+ return false unless other.respond_to?(:pointer_id)
549
+ pointer_id == other.pointer_id
550
+ end
551
+
552
+ ###
553
+ # Serialize Node using +options+. Save options can also be set using a
554
+ # block. See SaveOptions.
555
+ #
556
+ # These two statements are equivalent:
557
+ #
558
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
559
+ #
560
+ # or
561
+ #
562
+ # node.serialize(:encoding => 'UTF-8') do |config|
563
+ # config.format.as_xml
564
+ # end
565
+ #
566
+ def serialize *args, &block
567
+ options = args.first.is_a?(Hash) ? args.shift : {
568
+ :encoding => args[0],
569
+ :save_with => args[1] || SaveOptions::FORMAT
570
+ }
571
+
572
+ encoding = options[:encoding] || document.encoding
573
+
574
+ outstring = ""
575
+ if encoding && outstring.respond_to?(:force_encoding)
576
+ outstring.force_encoding(Encoding.find(encoding))
577
+ end
578
+ io = StringIO.new(outstring)
579
+ write_to io, options, &block
580
+ io.string
581
+ end
582
+
583
+ ###
584
+ # Serialize this Node to HTML
585
+ #
586
+ # doc.to_html
587
+ #
588
+ # See Node#write_to for a list of +options+. For formatted output,
589
+ # use Node#to_xhtml instead.
590
+ def to_html options = {}
591
+ # FIXME: this is a hack around broken libxml versions
592
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
593
+
594
+ options[:save_with] ||= SaveOptions::FORMAT |
595
+ SaveOptions::NO_DECLARATION |
596
+ SaveOptions::NO_EMPTY_TAGS |
597
+ SaveOptions::AS_HTML
598
+
599
+ serialize(options)
600
+ end
601
+
602
+ ###
603
+ # Serialize this Node to XML using +options+
604
+ #
605
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
606
+ #
607
+ # See Node#write_to for a list of +options+
608
+ def to_xml options = {}
609
+ encoding = nil
610
+
611
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
612
+
613
+ serialize(options)
614
+ end
615
+
616
+ ###
617
+ # Serialize this Node to XHTML using +options+
618
+ #
619
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
620
+ #
621
+ # See Node#write_to for a list of +options+
622
+ def to_xhtml options = {}
623
+ # FIXME: this is a hack around broken libxml versions
624
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
625
+
626
+ options[:save_with] ||= SaveOptions::FORMAT |
627
+ SaveOptions::NO_DECLARATION |
628
+ SaveOptions::NO_EMPTY_TAGS |
629
+ SaveOptions::AS_XHTML
630
+
631
+ serialize(options)
632
+ end
633
+
634
+ ###
635
+ # Write Node to +io+ with +options+. +options+ modify the output of
636
+ # this method. Valid options are:
637
+ #
638
+ # * +:encoding+ for changing the encoding
639
+ # * +:indent_text+ the indentation text, defaults to one space
640
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
641
+ # * +:save_with+ a combination of SaveOptions constants.
642
+ #
643
+ # To save with UTF-8 indented twice:
644
+ #
645
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
646
+ #
647
+ # To save indented with two dashes:
648
+ #
649
+ # node.write_to(io, :indent_text => '-', :indent => 2
650
+ #
651
+ def write_to io, *options
652
+ options = options.first.is_a?(Hash) ? options.shift : {}
653
+ encoding = options[:encoding] || options[0]
654
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
655
+ indent_text = options[:indent_text] || ' '
656
+ indent_times = options[:indent] || 2
657
+
658
+
659
+ config = SaveOptions.new(save_options)
660
+ yield config if block_given?
661
+
662
+ native_write_to(io, encoding, indent_text * indent_times, config.options)
663
+ end
664
+
665
+ ###
666
+ # Write Node as HTML to +io+ with +options+
667
+ #
668
+ # See Node#write_to for a list of +options+
669
+ def write_html_to io, options = {}
670
+ # FIXME: this is a hack around broken libxml versions
671
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
672
+
673
+ options[:save_with] ||= SaveOptions::FORMAT |
674
+ SaveOptions::NO_DECLARATION |
675
+ SaveOptions::NO_EMPTY_TAGS |
676
+ SaveOptions::AS_HTML
677
+ write_to io, options
678
+ end
679
+
680
+ ###
681
+ # Write Node as XHTML to +io+ with +options+
682
+ #
683
+ # See Node#write_to for a list of +options+
684
+ def write_xhtml_to io, options = {}
685
+ # FIXME: this is a hack around broken libxml versions
686
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
687
+
688
+ options[:save_with] ||= SaveOptions::FORMAT |
689
+ SaveOptions::NO_DECLARATION |
690
+ SaveOptions::NO_EMPTY_TAGS |
691
+ SaveOptions::AS_XHTML
692
+ write_to io, options
693
+ end
694
+
695
+ ###
696
+ # Write Node as XML to +io+ with +options+
697
+ #
698
+ # doc.write_xml_to io, :encoding => 'UTF-8'
699
+ #
700
+ # See Node#write_to for a list of options
701
+ def write_xml_to io, options = {}
702
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
703
+ write_to io, options
704
+ end
705
+
706
+ ###
707
+ # Compare two Node objects with respect to their Document. Nodes from
708
+ # different documents cannot be compared.
709
+ def <=> other
710
+ return nil unless other.is_a?(Nokogiri::XML::Node)
711
+ return nil unless document == other.document
712
+ compare other
713
+ end
714
+
715
+ private
716
+ def self.verify_nodeishness(node)
717
+ if node.is_a?(Document) || !node.is_a?(XML::Node)
718
+ raise ArgumentError, <<-EOERR
719
+ Node.replace requires a Node argument, and cannot accept a Document.
720
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
721
+ EOERR
722
+ end
723
+ end
724
+
725
+ def inspect_attributes
726
+ [:name, :namespace, :attribute_nodes, :children]
727
+ end
728
+ end
729
+ end
730
+ end