nokogiri-backupify 1.5.0.beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +509 -0
  3. data/CHANGELOG.rdoc +490 -0
  4. data/Manifest.txt +274 -0
  5. data/README.ja.rdoc +106 -0
  6. data/README.rdoc +150 -0
  7. data/Rakefile +217 -0
  8. data/bin/nokogiri +54 -0
  9. data/deps.rip +5 -0
  10. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  11. data/ext/java/nokogiri/HtmlDocument.java +146 -0
  12. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  13. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  14. data/ext/java/nokogiri/HtmlSaxParserContext.java +256 -0
  15. data/ext/java/nokogiri/NokogiriService.java +466 -0
  16. data/ext/java/nokogiri/XmlAttr.java +183 -0
  17. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  18. data/ext/java/nokogiri/XmlCdata.java +89 -0
  19. data/ext/java/nokogiri/XmlComment.java +84 -0
  20. data/ext/java/nokogiri/XmlDocument.java +514 -0
  21. data/ext/java/nokogiri/XmlDocumentFragment.java +216 -0
  22. data/ext/java/nokogiri/XmlDtd.java +464 -0
  23. data/ext/java/nokogiri/XmlElement.java +221 -0
  24. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  25. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  26. data/ext/java/nokogiri/XmlEntityDecl.java +161 -0
  27. data/ext/java/nokogiri/XmlEntityReference.java +75 -0
  28. data/ext/java/nokogiri/XmlNamespace.java +127 -0
  29. data/ext/java/nokogiri/XmlNode.java +1392 -0
  30. data/ext/java/nokogiri/XmlNodeSet.java +284 -0
  31. data/ext/java/nokogiri/XmlProcessingInstruction.java +103 -0
  32. data/ext/java/nokogiri/XmlReader.java +409 -0
  33. data/ext/java/nokogiri/XmlRelaxng.java +199 -0
  34. data/ext/java/nokogiri/XmlSaxParserContext.java +353 -0
  35. data/ext/java/nokogiri/XmlSaxPushParser.java +182 -0
  36. data/ext/java/nokogiri/XmlSchema.java +175 -0
  37. data/ext/java/nokogiri/XmlSyntaxError.java +114 -0
  38. data/ext/java/nokogiri/XmlText.java +135 -0
  39. data/ext/java/nokogiri/XmlXpathContext.java +175 -0
  40. data/ext/java/nokogiri/XsltStylesheet.java +181 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +205 -0
  42. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +80 -0
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +326 -0
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +583 -0
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +170 -0
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +118 -0
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +73 -0
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +120 -0
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +56 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +278 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +473 -0
  56. data/ext/java/nokogiri/internals/SaveContext.java +282 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +68 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +77 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +233 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +92 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +31 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1384 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +288 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +258 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/isorelax.jar +0 -0
  132. data/lib/jing.jar +0 -0
  133. data/lib/nekodtd.jar +0 -0
  134. data/lib/nekohtml.jar +0 -0
  135. data/lib/nokogiri.rb +143 -0
  136. data/lib/nokogiri/css.rb +23 -0
  137. data/lib/nokogiri/css/node.rb +99 -0
  138. data/lib/nokogiri/css/parser.rb +677 -0
  139. data/lib/nokogiri/css/parser.y +237 -0
  140. data/lib/nokogiri/css/parser_extras.rb +91 -0
  141. data/lib/nokogiri/css/syntax_error.rb +7 -0
  142. data/lib/nokogiri/css/tokenizer.rb +152 -0
  143. data/lib/nokogiri/css/tokenizer.rex +55 -0
  144. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  145. data/lib/nokogiri/decorators/slop.rb +35 -0
  146. data/lib/nokogiri/html.rb +36 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +221 -0
  149. data/lib/nokogiri/html/document_fragment.rb +41 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  152. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  153. data/lib/nokogiri/html/sax/parser.rb +52 -0
  154. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  155. data/lib/nokogiri/syntax_error.rb +4 -0
  156. data/lib/nokogiri/version.rb +35 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +418 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +218 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +84 -0
  165. data/lib/nokogiri/xml/dtd.rb +22 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  169. data/lib/nokogiri/xml/namespace.rb +13 -0
  170. data/lib/nokogiri/xml/node.rb +907 -0
  171. data/lib/nokogiri/xml/node/save_options.rb +45 -0
  172. data/lib/nokogiri/xml/node_set.rb +350 -0
  173. data/lib/nokogiri/xml/notation.rb +6 -0
  174. data/lib/nokogiri/xml/parse_options.rb +85 -0
  175. data/lib/nokogiri/xml/pp.rb +2 -0
  176. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  177. data/lib/nokogiri/xml/pp/node.rb +56 -0
  178. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  179. data/lib/nokogiri/xml/reader.rb +112 -0
  180. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  181. data/lib/nokogiri/xml/sax.rb +4 -0
  182. data/lib/nokogiri/xml/sax/document.rb +164 -0
  183. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  184. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  185. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  186. data/lib/nokogiri/xml/schema.rb +57 -0
  187. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  188. data/lib/nokogiri/xml/text.rb +9 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +52 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xercesImpl.jar +0 -0
  195. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  196. data/tasks/cross_compile.rb +177 -0
  197. data/tasks/test.rb +94 -0
  198. data/test/css/test_nthiness.rb +159 -0
  199. data/test/css/test_parser.rb +303 -0
  200. data/test/css/test_tokenizer.rb +198 -0
  201. data/test/css/test_xpath_visitor.rb +85 -0
  202. data/test/decorators/test_slop.rb +16 -0
  203. data/test/files/2ch.html +108 -0
  204. data/test/files/address_book.rlx +12 -0
  205. data/test/files/address_book.xml +10 -0
  206. data/test/files/bar/bar.xsd +4 -0
  207. data/test/files/dont_hurt_em_why.xml +422 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/po.xml +32 -0
  212. data/test/files/po.xsd +66 -0
  213. data/test/files/shift_jis.html +10 -0
  214. data/test/files/shift_jis.xml +5 -0
  215. data/test/files/snuggles.xml +3 -0
  216. data/test/files/staff.dtd +10 -0
  217. data/test/files/staff.xml +59 -0
  218. data/test/files/staff.xslt +32 -0
  219. data/test/files/tlm.html +850 -0
  220. data/test/files/valid_bar.xml +2 -0
  221. data/test/helper.rb +171 -0
  222. data/test/html/sax/test_parser.rb +136 -0
  223. data/test/html/sax/test_parser_context.rb +48 -0
  224. data/test/html/test_builder.rb +164 -0
  225. data/test/html/test_document.rb +457 -0
  226. data/test/html/test_document_encoding.rb +123 -0
  227. data/test/html/test_document_fragment.rb +255 -0
  228. data/test/html/test_element_description.rb +100 -0
  229. data/test/html/test_named_characters.rb +14 -0
  230. data/test/html/test_node.rb +190 -0
  231. data/test/html/test_node_encoding.rb +27 -0
  232. data/test/test_convert_xpath.rb +135 -0
  233. data/test/test_css_cache.rb +45 -0
  234. data/test/test_encoding_handler.rb +46 -0
  235. data/test/test_memory_leak.rb +52 -0
  236. data/test/test_nokogiri.rb +132 -0
  237. data/test/test_reader.rb +403 -0
  238. data/test/test_soap4r_sax.rb +52 -0
  239. data/test/test_xslt_transforms.rb +189 -0
  240. data/test/xml/node/test_save_options.rb +20 -0
  241. data/test/xml/node/test_subclass.rb +44 -0
  242. data/test/xml/sax/test_parser.rb +338 -0
  243. data/test/xml/sax/test_parser_context.rb +113 -0
  244. data/test/xml/sax/test_push_parser.rb +156 -0
  245. data/test/xml/test_attr.rb +65 -0
  246. data/test/xml/test_attribute_decl.rb +86 -0
  247. data/test/xml/test_builder.rb +210 -0
  248. data/test/xml/test_cdata.rb +50 -0
  249. data/test/xml/test_comment.rb +29 -0
  250. data/test/xml/test_document.rb +675 -0
  251. data/test/xml/test_document_encoding.rb +26 -0
  252. data/test/xml/test_document_fragment.rb +192 -0
  253. data/test/xml/test_dtd.rb +107 -0
  254. data/test/xml/test_dtd_encoding.rb +33 -0
  255. data/test/xml/test_element_content.rb +56 -0
  256. data/test/xml/test_element_decl.rb +73 -0
  257. data/test/xml/test_entity_decl.rb +122 -0
  258. data/test/xml/test_entity_reference.rb +21 -0
  259. data/test/xml/test_namespace.rb +70 -0
  260. data/test/xml/test_node.rb +899 -0
  261. data/test/xml/test_node_attributes.rb +34 -0
  262. data/test/xml/test_node_encoding.rb +107 -0
  263. data/test/xml/test_node_reparenting.rb +321 -0
  264. data/test/xml/test_node_set.rb +708 -0
  265. data/test/xml/test_parse_options.rb +52 -0
  266. data/test/xml/test_processing_instruction.rb +30 -0
  267. data/test/xml/test_reader_encoding.rb +126 -0
  268. data/test/xml/test_relax_ng.rb +60 -0
  269. data/test/xml/test_schema.rb +89 -0
  270. data/test/xml/test_syntax_error.rb +12 -0
  271. data/test/xml/test_text.rb +47 -0
  272. data/test/xml/test_unparented_node.rb +381 -0
  273. data/test/xml/test_xpath.rb +237 -0
  274. data/test/xslt/test_custom_functions.rb +94 -0
  275. metadata +525 -0
@@ -0,0 +1,11 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CDATA < Nokogiri::XML::Text
4
+ ###
5
+ # Get the name of this CDATA node
6
+ def name
7
+ '#cdata-section'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CharacterData < Nokogiri::XML::Node
4
+ include Nokogiri::XML::PP::CharacterData
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,218 @@
1
+ module Nokogiri
2
+ module XML
3
+ ##
4
+ # Nokogiri::XML::Document is the main entry point for dealing with
5
+ # XML documents. The Document is created by parsing an XML document.
6
+ # See Nokogiri.XML()
7
+ #
8
+ # For searching a Document, see Nokogiri::XML::Node#css and
9
+ # Nokogiri::XML::Node#xpath
10
+ class Document < Nokogiri::XML::Node
11
+ ##
12
+ # Parse an XML file. +thing+ may be a String, or any object that
13
+ # responds to _read_ and _close_ such as an IO, or StringIO.
14
+ # +url+ is resource where this document is located. +encoding+ is the
15
+ # encoding that should be used when processing the document. +options+
16
+ # is a number that sets options in the parser, such as
17
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
18
+ # Nokogiri::XML::ParseOptions.
19
+ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
20
+
21
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
22
+ # Give the options to the user
23
+ yield options if block_given?
24
+
25
+ if string_or_io.respond_to?(:read)
26
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
27
+ return read_io(string_or_io, url, encoding, options.to_i)
28
+ end
29
+
30
+ # read_memory pukes on empty docs
31
+ return new if string_or_io.nil? or string_or_io.empty?
32
+
33
+ read_memory(string_or_io, url, encoding, options.to_i)
34
+ end
35
+
36
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
37
+ attr_accessor :errors
38
+
39
+ def initialize *args # :nodoc:
40
+ @errors = []
41
+ @decorators = nil
42
+ end
43
+
44
+ ##
45
+ # Create an element with +name+, and optionally setting the content and attributes.
46
+ #
47
+ # doc.create_element "div" # <div></div>
48
+ # doc.create_element "div", :class => "container" # <div class='container'></div>
49
+ # doc.create_element "div", "contents" # <div>contents</div>
50
+ # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
51
+ # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
52
+ #
53
+ def create_element name, *args, &block
54
+ elm = Nokogiri::XML::Element.new(name, self, &block)
55
+ args.each do |arg|
56
+ case arg
57
+ when Hash
58
+ arg.each { |k,v|
59
+ key = k.to_s
60
+ if key =~ /^xmlns(:\w+)?$/
61
+ ns_name = key.split(":", 2)[1]
62
+ elm.add_namespace_definition ns_name, v
63
+ next
64
+ end
65
+ elm[k.to_s] = v.to_s
66
+ }
67
+ else
68
+ elm.content = arg
69
+ end
70
+ end
71
+ elm
72
+ end
73
+
74
+ # Create a text node with +text+
75
+ def create_text_node text, &block
76
+ Nokogiri::XML::Text.new(text.to_s, self, &block)
77
+ end
78
+
79
+ # Create a CDATA element containing +text+
80
+ def create_cdata text
81
+ Nokogiri::XML::CDATA.new(self, text.to_s)
82
+ end
83
+
84
+ # The name of this document. Always returns "document"
85
+ def name
86
+ 'document'
87
+ end
88
+
89
+ # A reference to +self+
90
+ def document
91
+ self
92
+ end
93
+
94
+ ##
95
+ # Recursively get all namespaces from this node and its subtree and
96
+ # return them as a hash.
97
+ #
98
+ # For example, given this document:
99
+ #
100
+ # <root xmlns:foo="bar">
101
+ # <bar xmlns:hello="world" />
102
+ # </root>
103
+ #
104
+ # This method will return:
105
+ #
106
+ # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
107
+ #
108
+ # WARNING: this method will clobber duplicate names in the keys.
109
+ # For example, given this document:
110
+ #
111
+ # <root xmlns:foo="bar">
112
+ # <bar xmlns:foo="baz" />
113
+ # </root>
114
+ #
115
+ # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
116
+ #
117
+ # Non-prefixed default namespaces (as in "xmlns=") are not included
118
+ # in the hash.
119
+ #
120
+ # Note this is a very expensive operation in current implementation, as it
121
+ # traverses the entire graph, and also has to bring each node accross the
122
+ # libxml bridge into a ruby object.
123
+ def collect_namespaces
124
+ ns = {}
125
+ traverse { |j| ns.merge!(j.namespaces) }
126
+ ns
127
+ end
128
+
129
+ # Get the list of decorators given +key+
130
+ def decorators key
131
+ @decorators ||= Hash.new
132
+ @decorators[key] ||= []
133
+ end
134
+
135
+ ##
136
+ # Validate this Document against it's DTD. Returns a list of errors on
137
+ # the document or +nil+ when there is no DTD.
138
+ def validate
139
+ return nil unless internal_subset
140
+ internal_subset.validate self
141
+ end
142
+
143
+ ##
144
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
145
+ #
146
+ # Note that any nodes that have been instantiated before #slop!
147
+ # is called will not be decorated with sloppy behavior. So, if you're in
148
+ # irb, the preferred idiom is:
149
+ #
150
+ # irb> doc = Nokogiri::Slop my_markup
151
+ #
152
+ # and not
153
+ #
154
+ # irb> doc = Nokogiri::HTML my_markup
155
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
156
+ # irb> doc.slop!
157
+ # ... which does absolutely nothing.
158
+ #
159
+ def slop!
160
+ unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
161
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
162
+ decorate!
163
+ end
164
+
165
+ self
166
+ end
167
+
168
+ ##
169
+ # Apply any decorators to +node+
170
+ def decorate node
171
+ return unless @decorators
172
+ @decorators.each { |klass,list|
173
+ next unless node.is_a?(klass)
174
+ list.each { |moodule| node.extend(moodule) }
175
+ }
176
+ end
177
+
178
+ alias :to_xml :serialize
179
+ alias :clone :dup
180
+
181
+ # Get the hash of namespaces on the root Nokogiri::XML::Node
182
+ def namespaces
183
+ root ? root.namespaces : {}
184
+ end
185
+
186
+ ##
187
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
188
+ # Returns an empty fragment if +tags+ is nil.
189
+ def fragment tags = nil
190
+ DocumentFragment.new(self, tags, self.root)
191
+ end
192
+
193
+ undef_method :swap, :parent, :namespace, :default_namespace=
194
+ undef_method :add_namespace_definition, :attributes
195
+ undef_method :namespace_definitions, :line, :add_namespace
196
+
197
+ def add_child child
198
+ raise "Document already has a root node" if root
199
+ if child.type == Node::DOCUMENT_FRAG_NODE
200
+ raise "Document cannot have multiple root nodes" if child.children.size > 1
201
+ super(child.children.first)
202
+ else
203
+ super
204
+ end
205
+ end
206
+ alias :<< :add_child
207
+
208
+ private
209
+ def implied_xpath_context
210
+ "/"
211
+ end
212
+
213
+ def inspect_attributes
214
+ [:name, :children]
215
+ end
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,84 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DocumentFragment < Nokogiri::XML::Node
4
+ ##
5
+ # Create a new DocumentFragment from +tags+.
6
+ #
7
+ # If +ctx+ is present, it is used as a context node for the
8
+ # subtree created, e.g., namespaces will be resolved relative
9
+ # to +ctx+.
10
+ def initialize document, tags = nil, ctx = nil
11
+ return self unless tags
12
+
13
+ children = if ctx
14
+ ctx.parse(tags)
15
+ else
16
+ XML::Document.parse("<root>#{tags}</root>") \
17
+ .xpath("/root/node()")
18
+ end
19
+ children.each { |child| child.parent = self }
20
+ end
21
+
22
+ ###
23
+ # return the name for DocumentFragment
24
+ def name
25
+ '#document-fragment'
26
+ end
27
+
28
+ ###
29
+ # Convert this DocumentFragment to a string
30
+ def to_s
31
+ children.to_s
32
+ end
33
+
34
+ ###
35
+ # Convert this DocumentFragment to html
36
+ # See Nokogiri::XML::NodeSet#to_html
37
+ def to_html *args
38
+ children.to_html(*args)
39
+ end
40
+
41
+ ###
42
+ # Convert this DocumentFragment to xhtml
43
+ # See Nokogiri::XML::NodeSet#to_xhtml
44
+ def to_xhtml *args
45
+ children.to_xhtml(*args)
46
+ end
47
+
48
+ ###
49
+ # Convert this DocumentFragment to xml
50
+ # See Nokogiri::XML::NodeSet#to_xml
51
+ def to_xml *args
52
+ children.to_xml(*args)
53
+ end
54
+
55
+ ###
56
+ # Search this fragment. See Nokogiri::XML::Node#css
57
+ def css *args
58
+ if children.any?
59
+ children.css(*args)
60
+ else
61
+ NodeSet.new(document)
62
+ end
63
+ end
64
+
65
+ alias :serialize :to_s
66
+
67
+ class << self
68
+ ####
69
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
70
+ def parse tags
71
+ self.new(XML::Document.new, tags)
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def coerce data
78
+ return super unless String === data
79
+
80
+ document.fragment(data).children
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,22 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DTD < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :values
6
+ undef_method :content
7
+ undef_method :namespace
8
+ undef_method :namespace_definitions
9
+ undef_method :line if method_defined?(:line)
10
+
11
+ def keys
12
+ attributes.keys
13
+ end
14
+
15
+ def each &block
16
+ attributes.each { |key, value|
17
+ block.call([key, value])
18
+ }
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Represents the allowed content in an Element Declaration inside a DTD:
5
+ #
6
+ # <?xml version="1.0"?><?TEST-STYLE PIDATA?>
7
+ # <!DOCTYPE staff SYSTEM "staff.dtd" [
8
+ # <!ELEMENT div1 (head, (p | list | note)*, div2*)>
9
+ # ]>
10
+ # </root>
11
+ #
12
+ # ElementContent represents the tree inside the <!ELEMENT> tag shown above
13
+ # that lists the possible content for the div1 tag.
14
+ class ElementContent
15
+ # Possible definitions of type
16
+ PCDATA = 1
17
+ ELEMENT = 2
18
+ SEQ = 3
19
+ OR = 4
20
+
21
+ # Possible content occurrences
22
+ ONCE = 1
23
+ OPT = 2
24
+ MULT = 3
25
+ PLUS = 4
26
+
27
+ attr_reader :document
28
+
29
+ ###
30
+ # Get the children of this ElementContent node
31
+ def children
32
+ [c1, c2].compact
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ElementDecl < Nokogiri::XML::Node
4
+ undef_method :namespace
5
+ undef_method :namespace_definitions
6
+ undef_method :line if method_defined?(:line)
7
+
8
+ def inspect
9
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module Nokogiri
2
+ module XML
3
+ class EntityDecl < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :attributes
6
+ undef_method :namespace
7
+ undef_method :namespace_definitions
8
+ undef_method :line if method_defined?(:line)
9
+
10
+ def self.new name, doc, *args
11
+ doc.create_entity(name, *args)
12
+ end
13
+
14
+ def inspect
15
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Namespace
4
+ include Nokogiri::XML::PP::Node
5
+ attr_reader :document
6
+
7
+ private
8
+ def inspect_attributes
9
+ [:prefix, :href]
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,907 @@
1
+ require 'stringio'
2
+ require 'nokogiri/xml/node/save_options'
3
+
4
+ module Nokogiri
5
+ module XML
6
+ ####
7
+ # Nokogiri::XML::Node is your window to the fun filled world of dealing
8
+ # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
9
+ # to a hash with regard to attributes. For example (from irb):
10
+ #
11
+ # irb(main):004:0> node
12
+ # => <a href="#foo" id="link">link</a>
13
+ # irb(main):005:0> node['href']
14
+ # => "#foo"
15
+ # irb(main):006:0> node.keys
16
+ # => ["href", "id"]
17
+ # irb(main):007:0> node.values
18
+ # => ["#foo", "link"]
19
+ # irb(main):008:0> node['class'] = 'green'
20
+ # => "green"
21
+ # irb(main):009:0> node
22
+ # => <a href="#foo" id="link" class="green">link</a>
23
+ # irb(main):010:0>
24
+ #
25
+ # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
26
+ #
27
+ # Nokogiri::XML::Node also has methods that let you move around your
28
+ # tree. For navigating your tree, see:
29
+ #
30
+ # * Nokogiri::XML::Node#parent
31
+ # * Nokogiri::XML::Node#children
32
+ # * Nokogiri::XML::Node#next
33
+ # * Nokogiri::XML::Node#previous
34
+ #
35
+ # You may search this node's subtree using Node#xpath and Node#css
36
+ class Node
37
+ include Nokogiri::XML::PP::Node
38
+ include Enumerable
39
+
40
+ # Element node type, see Nokogiri::XML::Node#element?
41
+ ELEMENT_NODE = 1
42
+ # Attribute node type
43
+ ATTRIBUTE_NODE = 2
44
+ # Text node type, see Nokogiri::XML::Node#text?
45
+ TEXT_NODE = 3
46
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
47
+ CDATA_SECTION_NODE = 4
48
+ # Entity reference node type
49
+ ENTITY_REF_NODE = 5
50
+ # Entity node type
51
+ ENTITY_NODE = 6
52
+ # PI node type
53
+ PI_NODE = 7
54
+ # Comment node type, see Nokogiri::XML::Node#comment?
55
+ COMMENT_NODE = 8
56
+ # Document node type, see Nokogiri::XML::Node#xml?
57
+ DOCUMENT_NODE = 9
58
+ # Document type node type
59
+ DOCUMENT_TYPE_NODE = 10
60
+ # Document fragment node type
61
+ DOCUMENT_FRAG_NODE = 11
62
+ # Notation node type
63
+ NOTATION_NODE = 12
64
+ # HTML document node type, see Nokogiri::XML::Node#html?
65
+ HTML_DOCUMENT_NODE = 13
66
+ # DTD node type
67
+ DTD_NODE = 14
68
+ # Element declaration type
69
+ ELEMENT_DECL = 15
70
+ # Attribute declaration type
71
+ ATTRIBUTE_DECL = 16
72
+ # Entity declaration type
73
+ ENTITY_DECL = 17
74
+ # Namespace declaration type
75
+ NAMESPACE_DECL = 18
76
+ # XInclude start type
77
+ XINCLUDE_START = 19
78
+ # XInclude end type
79
+ XINCLUDE_END = 20
80
+ # DOCB document node type
81
+ DOCB_DOCUMENT_NODE = 21
82
+
83
+ def initialize name, document # :nodoc:
84
+ # ... Ya. This is empty on purpose.
85
+ end
86
+
87
+ ###
88
+ # Decorate this node with the decorators set up in this node's Document
89
+ def decorate!
90
+ document.decorate(self)
91
+ end
92
+
93
+ ###
94
+ # Search this node for +paths+. +paths+ can be XPath or CSS, and an
95
+ # optional hash of namespaces may be appended.
96
+ # See Node#xpath and Node#css.
97
+ def search *paths
98
+ # TODO use paths, handler, ns, binds = extract_params(paths)
99
+ ns = paths.last.is_a?(Hash) ? paths.pop :
100
+ (document.root ? document.root.namespaces : {})
101
+
102
+ prefix = "#{implied_xpath_context}/"
103
+
104
+ xpath(*(paths.map { |path|
105
+ path = path.to_s
106
+ path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
107
+ path,
108
+ :prefix => prefix,
109
+ :ns => ns
110
+ )
111
+ }.flatten.uniq) + [ns])
112
+ end
113
+ alias :/ :search
114
+
115
+ ###
116
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
+ #
118
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
119
+ # queries.
120
+ #
121
+ # node.xpath('.//title')
122
+ #
123
+ # A hash of namespace bindings may be appended. For example:
124
+ #
125
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
126
+ # node.xpath('.//xmlns:name', node.root.namespaces)
127
+ #
128
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
+ #
130
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
+ #
132
+ # Custom XPath functions may also be defined. To define custom
133
+ # functions create a class and implement the function you want
134
+ # to define. The first argument to the method will be the
135
+ # current matching NodeSet. Any other arguments are ones that
136
+ # you pass in. Note that this class may appear anywhere in the
137
+ # argument list. For example:
138
+ #
139
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
140
+ # def regex node_set, regex
141
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
142
+ # end
143
+ # }.new)
144
+ #
145
+ def xpath *paths
146
+ return NodeSet.new(document) unless document
147
+
148
+ paths, handler, ns, binds = extract_params(paths)
149
+
150
+ sets = paths.map { |path|
151
+ ctx = XPathContext.new(self)
152
+ ctx.register_namespaces(ns)
153
+ path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
154
+
155
+ binds.each do |key,value|
156
+ ctx.register_variable key.to_s, value
157
+ end if binds
158
+
159
+ ctx.evaluate(path, handler)
160
+ }
161
+ return sets.first if sets.length == 1
162
+
163
+ NodeSet.new(document) do |combined|
164
+ sets.each do |set|
165
+ set.each do |node|
166
+ combined << node
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ ###
173
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
174
+ #
175
+ # Search this node for CSS +rules+. +rules+ must be one or more CSS
176
+ # selectors. For example:
177
+ #
178
+ # node.css('title')
179
+ # node.css('body h1.bold')
180
+ # node.css('div + p.green', 'div#one')
181
+ #
182
+ # A hash of namespace bindings may be appended. For example:
183
+ #
184
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
185
+ #
186
+ # Custom CSS pseudo classes may also be defined. To define
187
+ # custom pseudo classes, create a class and implement the custom
188
+ # pseudo class you want defined. The first argument to the
189
+ # method will be the current matching NodeSet. Any other
190
+ # arguments are ones that you pass in. For example:
191
+ #
192
+ # node.css('title:regex("\w+")', Class.new {
193
+ # def regex node_set, regex
194
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
195
+ # end
196
+ # }.new)
197
+ #
198
+ # Note that the CSS query string is case-sensitive with regards
199
+ # to your document type. That is, if you're looking for "H1" in
200
+ # an HTML document, you'll never find anything, since HTML tags
201
+ # will match only lowercase CSS queries. However, "H1" might be
202
+ # found in an XML document, where tags names are case-sensitive
203
+ # (e.g., "H1" is distinct from "h1").
204
+ #
205
+ def css *rules
206
+ rules, handler, ns, binds = extract_params(rules)
207
+
208
+ prefix = "#{implied_xpath_context}/"
209
+
210
+ rules = rules.map { |rule|
211
+ CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
212
+ }.flatten.uniq + [ns, handler, binds].compact
213
+
214
+ xpath(*rules)
215
+ end
216
+
217
+ ###
218
+ # Search this node's immediate children using CSS selector +selector+
219
+ def > selector
220
+ ns = document.root.namespaces
221
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
222
+ end
223
+
224
+ ###
225
+ # Search for the first occurrence of +path+.
226
+ #
227
+ # Returns nil if nothing is found, otherwise a Node.
228
+ def at path, ns = document.root ? document.root.namespaces : {}
229
+ search(path, ns).first
230
+ end
231
+ alias :% :at
232
+
233
+ ##
234
+ # Search this node for the first occurrence of XPath +paths+.
235
+ # Equivalent to <tt>xpath(paths).first</tt>
236
+ # See Node#xpath for more information.
237
+ #
238
+ def at_xpath *paths
239
+ xpath(*paths).first
240
+ end
241
+
242
+ ##
243
+ # Search this node for the first occurrence of CSS +rules+.
244
+ # Equivalent to <tt>css(rules).first</tt>
245
+ # See Node#css for more information.
246
+ #
247
+ def at_css *rules
248
+ css(*rules).first
249
+ end
250
+
251
+ ###
252
+ # Get the attribute value for the attribute +name+
253
+ def [] name
254
+ return nil unless key?(name.to_s)
255
+ get(name.to_s)
256
+ end
257
+
258
+ ###
259
+ # Add +node_or_tags+ as a child of this Node.
260
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
261
+ #
262
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
263
+ def add_child node_or_tags
264
+ node_or_tags = coerce(node_or_tags)
265
+ if node_or_tags.is_a?(XML::NodeSet)
266
+ node_or_tags.each { |n| add_child_node n }
267
+ else
268
+ add_child_node node_or_tags
269
+ end
270
+ node_or_tags
271
+ end
272
+
273
+ ###
274
+ # Insert +node_or_tags+ before this Node (as a sibling).
275
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
276
+ #
277
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
278
+ #
279
+ # Also see related method +before+.
280
+ def add_previous_sibling node_or_tags
281
+ node_or_tags = coerce(node_or_tags)
282
+ if node_or_tags.is_a?(XML::NodeSet)
283
+ if text?
284
+ pivot = Nokogiri::XML::Node.new 'dummy', document
285
+ add_previous_sibling_node pivot
286
+ else
287
+ pivot = self
288
+ end
289
+ node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
290
+ pivot.unlink if text?
291
+ else
292
+ add_previous_sibling_node node_or_tags
293
+ end
294
+ node_or_tags
295
+ end
296
+
297
+ ###
298
+ # Insert +node_or_tags+ after this Node (as a sibling).
299
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
300
+ #
301
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
302
+ #
303
+ # Also see related method +after+.
304
+ def add_next_sibling node_or_tags
305
+ node_or_tags = coerce(node_or_tags)
306
+ if node_or_tags.is_a?(XML::NodeSet)
307
+ if text?
308
+ pivot = Nokogiri::XML::Node.new 'dummy', document
309
+ add_next_sibling_node pivot
310
+ else
311
+ pivot = self
312
+ end
313
+ node_or_tags.reverse.each { |n| pivot.send :add_next_sibling_node, n }
314
+ pivot.unlink if text?
315
+ else
316
+ add_next_sibling_node node_or_tags
317
+ end
318
+ node_or_tags
319
+ end
320
+
321
+ ####
322
+ # Insert +node_or_tags+ before this node (as a sibling).
323
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
324
+ #
325
+ # Returns self, to support chaining of calls.
326
+ #
327
+ # Also see related method +add_previous_sibling+.
328
+ def before node_or_tags
329
+ add_previous_sibling node_or_tags
330
+ self
331
+ end
332
+
333
+ ####
334
+ # Insert +node_or_tags+ after this node (as a sibling).
335
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
336
+ #
337
+ # Returns self, to support chaining of calls.
338
+ #
339
+ # Also see related method +add_next_sibling+.
340
+ def after node_or_tags
341
+ add_next_sibling node_or_tags
342
+ self
343
+ end
344
+
345
+ ####
346
+ # Set the inner html for this Node to +node_or_tags+
347
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
348
+ #
349
+ # Returns self.
350
+ #
351
+ # Also see related method +children=+
352
+ def inner_html= node_or_tags
353
+ self.children = node_or_tags
354
+ self
355
+ end
356
+
357
+ ####
358
+ # Set the inner html for this Node +node_or_tags+
359
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
360
+ #
361
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
362
+ #
363
+ # Also see related method +inner_html=+
364
+ def children= node_or_tags
365
+ node_or_tags = coerce(node_or_tags)
366
+ children.unlink
367
+ if node_or_tags.is_a?(XML::NodeSet)
368
+ node_or_tags.each { |n| add_child_node n }
369
+ else
370
+ add_child_node node_or_tags
371
+ end
372
+ node_or_tags
373
+ end
374
+
375
+ ####
376
+ # Replace this Node with +node_or_tags+.
377
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
378
+ #
379
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
380
+ #
381
+ # Also see related method +swap+.
382
+ def replace node_or_tags
383
+ node_or_tags = coerce(node_or_tags)
384
+ if node_or_tags.is_a?(XML::NodeSet)
385
+ if text?
386
+ replacee = Nokogiri::XML::Node.new 'dummy', document
387
+ add_previous_sibling_node replacee
388
+ unlink
389
+ else
390
+ replacee = self
391
+ end
392
+ node_or_tags.each { |n| replacee.add_previous_sibling n }
393
+ replacee.unlink
394
+ else
395
+ replace_node node_or_tags
396
+ end
397
+ node_or_tags
398
+ end
399
+
400
+ ####
401
+ # Swap this Node for +node_or_tags+
402
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
403
+ #
404
+ # Returns self, to support chaining of calls.
405
+ #
406
+ # Also see related method +replace+.
407
+ def swap node_or_tags
408
+ replace node_or_tags
409
+ self
410
+ end
411
+
412
+ alias :next :next_sibling
413
+ alias :previous :previous_sibling
414
+
415
+ # :stopdoc:
416
+ # HACK: This is to work around an RDoc bug
417
+ alias :next= :add_next_sibling
418
+ # :startdoc:
419
+
420
+ alias :previous= :add_previous_sibling
421
+ alias :remove :unlink
422
+ alias :get_attribute :[]
423
+ alias :attr :[]
424
+ alias :set_attribute :[]=
425
+ alias :text :content
426
+ alias :inner_text :content
427
+ alias :has_attribute? :key?
428
+ alias :<< :add_child
429
+ alias :name :node_name
430
+ alias :name= :node_name=
431
+ alias :type :node_type
432
+ alias :to_str :text
433
+ alias :clone :dup
434
+ alias :elements :element_children
435
+
436
+ ####
437
+ # Returns a hash containing the node's attributes. The key is
438
+ # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
439
+ # representing the attribute.
440
+ # If you need to distinguish attributes with the same name, with different namespaces
441
+ # use #attribute_nodes instead.
442
+ def attributes
443
+ Hash[*(attribute_nodes.map { |node|
444
+ [node.node_name, node]
445
+ }.flatten)]
446
+ end
447
+
448
+ ###
449
+ # Get the attribute values for this Node.
450
+ def values
451
+ attribute_nodes.map { |node| node.value }
452
+ end
453
+
454
+ ###
455
+ # Get the attribute names for this Node.
456
+ def keys
457
+ attribute_nodes.map { |node| node.node_name }
458
+ end
459
+
460
+ ###
461
+ # Iterate over each attribute name and value pair for this Node.
462
+ def each &block
463
+ attribute_nodes.each { |node|
464
+ block.call([node.node_name, node.value])
465
+ }
466
+ end
467
+
468
+ ###
469
+ # Remove the attribute named +name+
470
+ def remove_attribute name
471
+ attributes[name].remove if key? name
472
+ end
473
+ alias :delete :remove_attribute
474
+
475
+ ###
476
+ # Returns true if this Node matches +selector+
477
+ def matches? selector
478
+ ancestors.last.search(selector).include?(self)
479
+ end
480
+
481
+ ###
482
+ # Create a DocumentFragment containing +tags+ that is relative to _this_
483
+ # context node.
484
+ def fragment tags
485
+ type = document.html? ? Nokogiri::HTML : Nokogiri::XML
486
+ type::DocumentFragment.new(document, tags, self)
487
+ end
488
+
489
+ ###
490
+ # Parse +string_or_io+ as a document fragment within the context of
491
+ # *this* node. Returns a XML::NodeSet containing the nodes parsed from
492
+ # +string_or_io+.
493
+ def parse string_or_io, options = nil
494
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
495
+ if Fixnum === options
496
+ options = Nokogiri::XML::ParseOptions.new(options)
497
+ end
498
+ # Give the options to the user
499
+ yield options if block_given?
500
+
501
+ contents = string_or_io.respond_to?(:read) ?
502
+ string_or_io.read :
503
+ string_or_io
504
+
505
+ return Nokogiri::XML::NodeSet.new(document) if contents.empty?
506
+
507
+ ##
508
+ # This is a horrible hack, but I don't care. See #313 for background.
509
+ error_count = document.errors.length
510
+ node_set = in_context(contents, options.to_i)
511
+ if node_set.empty? and document.errors.length > error_count and options.recover?
512
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
513
+ node_set = fragment.children
514
+ end
515
+ node_set
516
+ end
517
+
518
+ ####
519
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
520
+ def content= string
521
+ self.native_content = encode_special_chars(string.to_s)
522
+ end
523
+
524
+ ###
525
+ # Set the parent Node for this Node
526
+ def parent= parent_node
527
+ parent_node.add_child(self)
528
+ parent_node
529
+ end
530
+
531
+ ###
532
+ # Returns a Hash of {prefix => value} for all namespaces on this
533
+ # node and its ancestors.
534
+ #
535
+ # This method returns the same namespaces as #namespace_scopes.
536
+ #
537
+ # Returns namespaces in scope for self -- those defined on self
538
+ # element directly or any ancestor node -- as a Hash of
539
+ # attribute-name/value pairs. Note that the keys in this hash
540
+ # XML attributes that would be used to define this namespace,
541
+ # such as "xmlns:prefix", not just the prefix. Default namespace
542
+ # set on self will be included with key "xmlns". However,
543
+ # default namespaces set on ancestor will NOT be, even if self
544
+ # has no explicit default namespace.
545
+ def namespaces
546
+ Hash[*namespace_scopes.map { |nd|
547
+ key = ['xmlns', nd.prefix].compact.join(':')
548
+ if RUBY_VERSION >= '1.9' && document.encoding
549
+ begin
550
+ key.force_encoding document.encoding
551
+ rescue ArgumentError
552
+ end
553
+ end
554
+ [key, nd.href]
555
+ }.flatten]
556
+ end
557
+
558
+ # Returns true if this is a Comment
559
+ def comment?
560
+ type == COMMENT_NODE
561
+ end
562
+
563
+ # Returns true if this is a CDATA
564
+ def cdata?
565
+ type == CDATA_SECTION_NODE
566
+ end
567
+
568
+ # Returns true if this is an XML::Document node
569
+ def xml?
570
+ type == DOCUMENT_NODE
571
+ end
572
+
573
+ # Returns true if this is an HTML::Document node
574
+ def html?
575
+ type == HTML_DOCUMENT_NODE
576
+ end
577
+
578
+ # Returns true if this is a Text node
579
+ def text?
580
+ type == TEXT_NODE
581
+ end
582
+
583
+ # Returns true if this is a DocumentFragment
584
+ def fragment?
585
+ type == DOCUMENT_FRAG_NODE
586
+ end
587
+
588
+ ###
589
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
590
+ # nil on XML documents and on unknown tags.
591
+ def description
592
+ return nil if document.xml?
593
+ Nokogiri::HTML::ElementDescription[name]
594
+ end
595
+
596
+ ###
597
+ # Is this a read only node?
598
+ def read_only?
599
+ # According to gdome2, these are read-only node types
600
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
601
+ end
602
+
603
+ # Returns true if this is an Element node
604
+ def element?
605
+ type == ELEMENT_NODE
606
+ end
607
+ alias :elem? :element?
608
+
609
+ ###
610
+ # Turn this node in to a string. If the document is HTML, this method
611
+ # returns html. If the document is XML, this method returns XML.
612
+ def to_s
613
+ document.xml? ? to_xml : to_html
614
+ end
615
+
616
+ # Get the inner_html for this node's Node#children
617
+ def inner_html *args
618
+ children.map { |x| x.to_html(*args) }.join
619
+ end
620
+
621
+ # Get the path to this node as a CSS expression
622
+ def css_path
623
+ path.split(/\//).map { |part|
624
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
625
+ }.compact.join(' > ')
626
+ end
627
+
628
+ ###
629
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
630
+ # the ancestors must match +selector+
631
+ def ancestors selector = nil
632
+ return NodeSet.new(document) unless respond_to?(:parent)
633
+ return NodeSet.new(document) unless parent
634
+
635
+ parents = [parent]
636
+
637
+ while parents.last.respond_to?(:parent)
638
+ break unless ctx_parent = parents.last.parent
639
+ parents << ctx_parent
640
+ end
641
+
642
+ return NodeSet.new(document, parents) unless selector
643
+
644
+ root = parents.last
645
+
646
+ NodeSet.new(document, parents.find_all { |parent|
647
+ root.search(selector).include?(parent)
648
+ })
649
+ end
650
+
651
+ ###
652
+ # Adds a default namespace supplied as a string +url+ href, to self.
653
+ # The consequence is as an xmlns attribute with supplied argument were
654
+ # present in parsed XML. A default namespace set with this method will
655
+ # now show up in #attributes, but when this node is serialized to XML an
656
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
657
+ def default_namespace= url
658
+ add_namespace_definition(nil, url)
659
+ end
660
+ alias :add_namespace :add_namespace_definition
661
+
662
+ ###
663
+ # Set the default namespace on this node (as would be defined with an
664
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
665
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
666
+ # for this node. You probably want #default_namespace= instead, or perhaps
667
+ # #add_namespace_definition with a nil prefix argument.
668
+ def namespace= ns
669
+ return set_namespace(ns) unless ns
670
+
671
+ unless Nokogiri::XML::Namespace === ns
672
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
673
+ end
674
+ if ns.document != document
675
+ raise ArgumentError, 'namespace must be declared on the same document'
676
+ end
677
+
678
+ set_namespace ns
679
+ end
680
+
681
+ ####
682
+ # Yields self and all children to +block+ recursively.
683
+ def traverse &block
684
+ children.each{|j| j.traverse(&block) }
685
+ block.call(self)
686
+ end
687
+
688
+ ###
689
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
690
+ def accept visitor
691
+ visitor.visit(self)
692
+ end
693
+
694
+ ###
695
+ # Test to see if this Node is equal to +other+
696
+ def == other
697
+ return false unless other
698
+ return false unless other.respond_to?(:pointer_id)
699
+ pointer_id == other.pointer_id
700
+ end
701
+
702
+ ###
703
+ # Serialize Node using +options+. Save options can also be set using a
704
+ # block. See SaveOptions.
705
+ #
706
+ # These two statements are equivalent:
707
+ #
708
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
709
+ #
710
+ # or
711
+ #
712
+ # node.serialize(:encoding => 'UTF-8') do |config|
713
+ # config.format.as_xml
714
+ # end
715
+ #
716
+ def serialize *args, &block
717
+ options = args.first.is_a?(Hash) ? args.shift : {
718
+ :encoding => args[0],
719
+ :save_with => args[1] || SaveOptions::FORMAT
720
+ }
721
+
722
+ encoding = options[:encoding] || document.encoding
723
+ options[:encoding] = encoding
724
+
725
+ outstring = ""
726
+ if encoding && outstring.respond_to?(:force_encoding)
727
+ outstring.force_encoding(Encoding.find(encoding))
728
+ end
729
+ io = StringIO.new(outstring)
730
+ write_to io, options, &block
731
+ io.string
732
+ end
733
+
734
+ ###
735
+ # Serialize this Node to HTML
736
+ #
737
+ # doc.to_html
738
+ #
739
+ # See Node#write_to for a list of +options+. For formatted output,
740
+ # use Node#to_xhtml instead.
741
+ def to_html options = {}
742
+ # FIXME: this is a hack around broken libxml versions
743
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
744
+
745
+ options[:save_with] ||= SaveOptions::FORMAT |
746
+ SaveOptions::NO_DECLARATION |
747
+ SaveOptions::NO_EMPTY_TAGS |
748
+ SaveOptions::AS_HTML
749
+
750
+ serialize(options)
751
+ end
752
+
753
+ ###
754
+ # Serialize this Node to XML using +options+
755
+ #
756
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
757
+ #
758
+ # See Node#write_to for a list of +options+
759
+ def to_xml options = {}
760
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
761
+
762
+ serialize(options)
763
+ end
764
+
765
+ ###
766
+ # Serialize this Node to XHTML using +options+
767
+ #
768
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
769
+ #
770
+ # See Node#write_to for a list of +options+
771
+ def to_xhtml options = {}
772
+ # FIXME: this is a hack around broken libxml versions
773
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
774
+
775
+ options[:save_with] ||= SaveOptions::FORMAT |
776
+ SaveOptions::NO_DECLARATION |
777
+ SaveOptions::NO_EMPTY_TAGS |
778
+ SaveOptions::AS_XHTML
779
+
780
+ serialize(options)
781
+ end
782
+
783
+ ###
784
+ # Write Node to +io+ with +options+. +options+ modify the output of
785
+ # this method. Valid options are:
786
+ #
787
+ # * +:encoding+ for changing the encoding
788
+ # * +:indent_text+ the indentation text, defaults to one space
789
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
790
+ # * +:save_with+ a combination of SaveOptions constants.
791
+ #
792
+ # To save with UTF-8 indented twice:
793
+ #
794
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
795
+ #
796
+ # To save indented with two dashes:
797
+ #
798
+ # node.write_to(io, :indent_text => '-', :indent => 2
799
+ #
800
+ def write_to io, *options
801
+ options = options.first.is_a?(Hash) ? options.shift : {}
802
+ encoding = options[:encoding] || options[0]
803
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
804
+ indent_text = options[:indent_text] || ' '
805
+ indent_times = options[:indent] || 2
806
+
807
+ config = SaveOptions.new(save_options.to_i)
808
+ yield config if block_given?
809
+
810
+ native_write_to(io, encoding, indent_text * indent_times, config.options)
811
+ end
812
+
813
+ ###
814
+ # Write Node as HTML to +io+ with +options+
815
+ #
816
+ # See Node#write_to for a list of +options+
817
+ def write_html_to io, options = {}
818
+ # FIXME: this is a hack around broken libxml versions
819
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
820
+
821
+ options[:save_with] ||= SaveOptions::FORMAT |
822
+ SaveOptions::NO_DECLARATION |
823
+ SaveOptions::NO_EMPTY_TAGS |
824
+ SaveOptions::AS_HTML
825
+ write_to io, options
826
+ end
827
+
828
+ ###
829
+ # Write Node as XHTML to +io+ with +options+
830
+ #
831
+ # See Node#write_to for a list of +options+
832
+ def write_xhtml_to io, options = {}
833
+ # FIXME: this is a hack around broken libxml versions
834
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
835
+
836
+ options[:save_with] ||= SaveOptions::FORMAT |
837
+ SaveOptions::NO_DECLARATION |
838
+ SaveOptions::NO_EMPTY_TAGS |
839
+ SaveOptions::AS_XHTML
840
+ write_to io, options
841
+ end
842
+
843
+ ###
844
+ # Write Node as XML to +io+ with +options+
845
+ #
846
+ # doc.write_xml_to io, :encoding => 'UTF-8'
847
+ #
848
+ # See Node#write_to for a list of options
849
+ def write_xml_to io, options = {}
850
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
851
+ write_to io, options
852
+ end
853
+
854
+ ###
855
+ # Compare two Node objects with respect to their Document. Nodes from
856
+ # different documents cannot be compared.
857
+ def <=> other
858
+ return nil unless other.is_a?(Nokogiri::XML::Node)
859
+ return nil unless document == other.document
860
+ compare other
861
+ end
862
+
863
+ private
864
+
865
+ def extract_params params # :nodoc:
866
+ # Pop off our custom function handler if it exists
867
+ handler = params.find { |param|
868
+ ![Hash, String, Symbol].include?(param.class)
869
+ }
870
+
871
+ params -= [handler] if handler
872
+
873
+ hashes = []
874
+ hashes << params.pop while Hash === params.last || params.last.nil?
875
+
876
+ ns, binds = hashes.reverse
877
+
878
+ ns ||= document.root ? document.root.namespaces : {}
879
+
880
+ [params, handler, ns, binds]
881
+ end
882
+
883
+ def coerce data # :nodoc:
884
+ return data if data.is_a?(XML::NodeSet)
885
+ return data.children if data.is_a?(XML::DocumentFragment)
886
+ return fragment(data).children if data.is_a?(String)
887
+
888
+ if data.is_a?(Document) || !data.is_a?(XML::Node)
889
+ raise ArgumentError, <<-EOERR
890
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
891
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
892
+ EOERR
893
+ end
894
+
895
+ data
896
+ end
897
+
898
+ def implied_xpath_context
899
+ "./"
900
+ end
901
+
902
+ def inspect_attributes
903
+ [:name, :namespace, :attribute_nodes, :children]
904
+ end
905
+ end
906
+ end
907
+ end