nokogiri-backupify 1.5.0.beta.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (275) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +509 -0
  3. data/CHANGELOG.rdoc +490 -0
  4. data/Manifest.txt +274 -0
  5. data/README.ja.rdoc +106 -0
  6. data/README.rdoc +150 -0
  7. data/Rakefile +217 -0
  8. data/bin/nokogiri +54 -0
  9. data/deps.rip +5 -0
  10. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  11. data/ext/java/nokogiri/HtmlDocument.java +146 -0
  12. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  13. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  14. data/ext/java/nokogiri/HtmlSaxParserContext.java +256 -0
  15. data/ext/java/nokogiri/NokogiriService.java +466 -0
  16. data/ext/java/nokogiri/XmlAttr.java +183 -0
  17. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  18. data/ext/java/nokogiri/XmlCdata.java +89 -0
  19. data/ext/java/nokogiri/XmlComment.java +84 -0
  20. data/ext/java/nokogiri/XmlDocument.java +514 -0
  21. data/ext/java/nokogiri/XmlDocumentFragment.java +216 -0
  22. data/ext/java/nokogiri/XmlDtd.java +464 -0
  23. data/ext/java/nokogiri/XmlElement.java +221 -0
  24. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  25. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  26. data/ext/java/nokogiri/XmlEntityDecl.java +161 -0
  27. data/ext/java/nokogiri/XmlEntityReference.java +75 -0
  28. data/ext/java/nokogiri/XmlNamespace.java +127 -0
  29. data/ext/java/nokogiri/XmlNode.java +1392 -0
  30. data/ext/java/nokogiri/XmlNodeSet.java +284 -0
  31. data/ext/java/nokogiri/XmlProcessingInstruction.java +103 -0
  32. data/ext/java/nokogiri/XmlReader.java +409 -0
  33. data/ext/java/nokogiri/XmlRelaxng.java +199 -0
  34. data/ext/java/nokogiri/XmlSaxParserContext.java +353 -0
  35. data/ext/java/nokogiri/XmlSaxPushParser.java +182 -0
  36. data/ext/java/nokogiri/XmlSchema.java +175 -0
  37. data/ext/java/nokogiri/XmlSyntaxError.java +114 -0
  38. data/ext/java/nokogiri/XmlText.java +135 -0
  39. data/ext/java/nokogiri/XmlXpathContext.java +175 -0
  40. data/ext/java/nokogiri/XsltStylesheet.java +181 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +205 -0
  42. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +80 -0
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +326 -0
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +583 -0
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +170 -0
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +118 -0
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +73 -0
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +120 -0
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +56 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +278 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +473 -0
  56. data/ext/java/nokogiri/internals/SaveContext.java +282 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +68 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +77 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +233 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +92 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +31 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1384 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +288 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +258 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/isorelax.jar +0 -0
  132. data/lib/jing.jar +0 -0
  133. data/lib/nekodtd.jar +0 -0
  134. data/lib/nekohtml.jar +0 -0
  135. data/lib/nokogiri.rb +143 -0
  136. data/lib/nokogiri/css.rb +23 -0
  137. data/lib/nokogiri/css/node.rb +99 -0
  138. data/lib/nokogiri/css/parser.rb +677 -0
  139. data/lib/nokogiri/css/parser.y +237 -0
  140. data/lib/nokogiri/css/parser_extras.rb +91 -0
  141. data/lib/nokogiri/css/syntax_error.rb +7 -0
  142. data/lib/nokogiri/css/tokenizer.rb +152 -0
  143. data/lib/nokogiri/css/tokenizer.rex +55 -0
  144. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  145. data/lib/nokogiri/decorators/slop.rb +35 -0
  146. data/lib/nokogiri/html.rb +36 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +221 -0
  149. data/lib/nokogiri/html/document_fragment.rb +41 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  152. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  153. data/lib/nokogiri/html/sax/parser.rb +52 -0
  154. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  155. data/lib/nokogiri/syntax_error.rb +4 -0
  156. data/lib/nokogiri/version.rb +35 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +418 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +218 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +84 -0
  165. data/lib/nokogiri/xml/dtd.rb +22 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  169. data/lib/nokogiri/xml/namespace.rb +13 -0
  170. data/lib/nokogiri/xml/node.rb +907 -0
  171. data/lib/nokogiri/xml/node/save_options.rb +45 -0
  172. data/lib/nokogiri/xml/node_set.rb +350 -0
  173. data/lib/nokogiri/xml/notation.rb +6 -0
  174. data/lib/nokogiri/xml/parse_options.rb +85 -0
  175. data/lib/nokogiri/xml/pp.rb +2 -0
  176. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  177. data/lib/nokogiri/xml/pp/node.rb +56 -0
  178. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  179. data/lib/nokogiri/xml/reader.rb +112 -0
  180. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  181. data/lib/nokogiri/xml/sax.rb +4 -0
  182. data/lib/nokogiri/xml/sax/document.rb +164 -0
  183. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  184. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  185. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  186. data/lib/nokogiri/xml/schema.rb +57 -0
  187. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  188. data/lib/nokogiri/xml/text.rb +9 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +52 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xercesImpl.jar +0 -0
  195. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  196. data/tasks/cross_compile.rb +177 -0
  197. data/tasks/test.rb +94 -0
  198. data/test/css/test_nthiness.rb +159 -0
  199. data/test/css/test_parser.rb +303 -0
  200. data/test/css/test_tokenizer.rb +198 -0
  201. data/test/css/test_xpath_visitor.rb +85 -0
  202. data/test/decorators/test_slop.rb +16 -0
  203. data/test/files/2ch.html +108 -0
  204. data/test/files/address_book.rlx +12 -0
  205. data/test/files/address_book.xml +10 -0
  206. data/test/files/bar/bar.xsd +4 -0
  207. data/test/files/dont_hurt_em_why.xml +422 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/po.xml +32 -0
  212. data/test/files/po.xsd +66 -0
  213. data/test/files/shift_jis.html +10 -0
  214. data/test/files/shift_jis.xml +5 -0
  215. data/test/files/snuggles.xml +3 -0
  216. data/test/files/staff.dtd +10 -0
  217. data/test/files/staff.xml +59 -0
  218. data/test/files/staff.xslt +32 -0
  219. data/test/files/tlm.html +850 -0
  220. data/test/files/valid_bar.xml +2 -0
  221. data/test/helper.rb +171 -0
  222. data/test/html/sax/test_parser.rb +136 -0
  223. data/test/html/sax/test_parser_context.rb +48 -0
  224. data/test/html/test_builder.rb +164 -0
  225. data/test/html/test_document.rb +457 -0
  226. data/test/html/test_document_encoding.rb +123 -0
  227. data/test/html/test_document_fragment.rb +255 -0
  228. data/test/html/test_element_description.rb +100 -0
  229. data/test/html/test_named_characters.rb +14 -0
  230. data/test/html/test_node.rb +190 -0
  231. data/test/html/test_node_encoding.rb +27 -0
  232. data/test/test_convert_xpath.rb +135 -0
  233. data/test/test_css_cache.rb +45 -0
  234. data/test/test_encoding_handler.rb +46 -0
  235. data/test/test_memory_leak.rb +52 -0
  236. data/test/test_nokogiri.rb +132 -0
  237. data/test/test_reader.rb +403 -0
  238. data/test/test_soap4r_sax.rb +52 -0
  239. data/test/test_xslt_transforms.rb +189 -0
  240. data/test/xml/node/test_save_options.rb +20 -0
  241. data/test/xml/node/test_subclass.rb +44 -0
  242. data/test/xml/sax/test_parser.rb +338 -0
  243. data/test/xml/sax/test_parser_context.rb +113 -0
  244. data/test/xml/sax/test_push_parser.rb +156 -0
  245. data/test/xml/test_attr.rb +65 -0
  246. data/test/xml/test_attribute_decl.rb +86 -0
  247. data/test/xml/test_builder.rb +210 -0
  248. data/test/xml/test_cdata.rb +50 -0
  249. data/test/xml/test_comment.rb +29 -0
  250. data/test/xml/test_document.rb +675 -0
  251. data/test/xml/test_document_encoding.rb +26 -0
  252. data/test/xml/test_document_fragment.rb +192 -0
  253. data/test/xml/test_dtd.rb +107 -0
  254. data/test/xml/test_dtd_encoding.rb +33 -0
  255. data/test/xml/test_element_content.rb +56 -0
  256. data/test/xml/test_element_decl.rb +73 -0
  257. data/test/xml/test_entity_decl.rb +122 -0
  258. data/test/xml/test_entity_reference.rb +21 -0
  259. data/test/xml/test_namespace.rb +70 -0
  260. data/test/xml/test_node.rb +899 -0
  261. data/test/xml/test_node_attributes.rb +34 -0
  262. data/test/xml/test_node_encoding.rb +107 -0
  263. data/test/xml/test_node_reparenting.rb +321 -0
  264. data/test/xml/test_node_set.rb +708 -0
  265. data/test/xml/test_parse_options.rb +52 -0
  266. data/test/xml/test_processing_instruction.rb +30 -0
  267. data/test/xml/test_reader_encoding.rb +126 -0
  268. data/test/xml/test_relax_ng.rb +60 -0
  269. data/test/xml/test_schema.rb +89 -0
  270. data/test/xml/test_syntax_error.rb +12 -0
  271. data/test/xml/test_text.rb +47 -0
  272. data/test/xml/test_unparented_node.rb +381 -0
  273. data/test/xml/test_xpath.rb +237 -0
  274. data/test/xslt/test_custom_functions.rb +94 -0
  275. metadata +525 -0
@@ -0,0 +1,11 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CDATA < Nokogiri::XML::Text
4
+ ###
5
+ # Get the name of this CDATA node
6
+ def name
7
+ '#cdata-section'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CharacterData < Nokogiri::XML::Node
4
+ include Nokogiri::XML::PP::CharacterData
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,218 @@
1
+ module Nokogiri
2
+ module XML
3
+ ##
4
+ # Nokogiri::XML::Document is the main entry point for dealing with
5
+ # XML documents. The Document is created by parsing an XML document.
6
+ # See Nokogiri.XML()
7
+ #
8
+ # For searching a Document, see Nokogiri::XML::Node#css and
9
+ # Nokogiri::XML::Node#xpath
10
+ class Document < Nokogiri::XML::Node
11
+ ##
12
+ # Parse an XML file. +thing+ may be a String, or any object that
13
+ # responds to _read_ and _close_ such as an IO, or StringIO.
14
+ # +url+ is resource where this document is located. +encoding+ is the
15
+ # encoding that should be used when processing the document. +options+
16
+ # is a number that sets options in the parser, such as
17
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
18
+ # Nokogiri::XML::ParseOptions.
19
+ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
20
+
21
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
22
+ # Give the options to the user
23
+ yield options if block_given?
24
+
25
+ if string_or_io.respond_to?(:read)
26
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
27
+ return read_io(string_or_io, url, encoding, options.to_i)
28
+ end
29
+
30
+ # read_memory pukes on empty docs
31
+ return new if string_or_io.nil? or string_or_io.empty?
32
+
33
+ read_memory(string_or_io, url, encoding, options.to_i)
34
+ end
35
+
36
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
37
+ attr_accessor :errors
38
+
39
+ def initialize *args # :nodoc:
40
+ @errors = []
41
+ @decorators = nil
42
+ end
43
+
44
+ ##
45
+ # Create an element with +name+, and optionally setting the content and attributes.
46
+ #
47
+ # doc.create_element "div" # <div></div>
48
+ # doc.create_element "div", :class => "container" # <div class='container'></div>
49
+ # doc.create_element "div", "contents" # <div>contents</div>
50
+ # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
51
+ # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
52
+ #
53
+ def create_element name, *args, &block
54
+ elm = Nokogiri::XML::Element.new(name, self, &block)
55
+ args.each do |arg|
56
+ case arg
57
+ when Hash
58
+ arg.each { |k,v|
59
+ key = k.to_s
60
+ if key =~ /^xmlns(:\w+)?$/
61
+ ns_name = key.split(":", 2)[1]
62
+ elm.add_namespace_definition ns_name, v
63
+ next
64
+ end
65
+ elm[k.to_s] = v.to_s
66
+ }
67
+ else
68
+ elm.content = arg
69
+ end
70
+ end
71
+ elm
72
+ end
73
+
74
+ # Create a text node with +text+
75
+ def create_text_node text, &block
76
+ Nokogiri::XML::Text.new(text.to_s, self, &block)
77
+ end
78
+
79
+ # Create a CDATA element containing +text+
80
+ def create_cdata text
81
+ Nokogiri::XML::CDATA.new(self, text.to_s)
82
+ end
83
+
84
+ # The name of this document. Always returns "document"
85
+ def name
86
+ 'document'
87
+ end
88
+
89
+ # A reference to +self+
90
+ def document
91
+ self
92
+ end
93
+
94
+ ##
95
+ # Recursively get all namespaces from this node and its subtree and
96
+ # return them as a hash.
97
+ #
98
+ # For example, given this document:
99
+ #
100
+ # <root xmlns:foo="bar">
101
+ # <bar xmlns:hello="world" />
102
+ # </root>
103
+ #
104
+ # This method will return:
105
+ #
106
+ # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
107
+ #
108
+ # WARNING: this method will clobber duplicate names in the keys.
109
+ # For example, given this document:
110
+ #
111
+ # <root xmlns:foo="bar">
112
+ # <bar xmlns:foo="baz" />
113
+ # </root>
114
+ #
115
+ # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
116
+ #
117
+ # Non-prefixed default namespaces (as in "xmlns=") are not included
118
+ # in the hash.
119
+ #
120
+ # Note this is a very expensive operation in current implementation, as it
121
+ # traverses the entire graph, and also has to bring each node accross the
122
+ # libxml bridge into a ruby object.
123
+ def collect_namespaces
124
+ ns = {}
125
+ traverse { |j| ns.merge!(j.namespaces) }
126
+ ns
127
+ end
128
+
129
+ # Get the list of decorators given +key+
130
+ def decorators key
131
+ @decorators ||= Hash.new
132
+ @decorators[key] ||= []
133
+ end
134
+
135
+ ##
136
+ # Validate this Document against it's DTD. Returns a list of errors on
137
+ # the document or +nil+ when there is no DTD.
138
+ def validate
139
+ return nil unless internal_subset
140
+ internal_subset.validate self
141
+ end
142
+
143
+ ##
144
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
145
+ #
146
+ # Note that any nodes that have been instantiated before #slop!
147
+ # is called will not be decorated with sloppy behavior. So, if you're in
148
+ # irb, the preferred idiom is:
149
+ #
150
+ # irb> doc = Nokogiri::Slop my_markup
151
+ #
152
+ # and not
153
+ #
154
+ # irb> doc = Nokogiri::HTML my_markup
155
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
156
+ # irb> doc.slop!
157
+ # ... which does absolutely nothing.
158
+ #
159
+ def slop!
160
+ unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
161
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
162
+ decorate!
163
+ end
164
+
165
+ self
166
+ end
167
+
168
+ ##
169
+ # Apply any decorators to +node+
170
+ def decorate node
171
+ return unless @decorators
172
+ @decorators.each { |klass,list|
173
+ next unless node.is_a?(klass)
174
+ list.each { |moodule| node.extend(moodule) }
175
+ }
176
+ end
177
+
178
+ alias :to_xml :serialize
179
+ alias :clone :dup
180
+
181
+ # Get the hash of namespaces on the root Nokogiri::XML::Node
182
+ def namespaces
183
+ root ? root.namespaces : {}
184
+ end
185
+
186
+ ##
187
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
188
+ # Returns an empty fragment if +tags+ is nil.
189
+ def fragment tags = nil
190
+ DocumentFragment.new(self, tags, self.root)
191
+ end
192
+
193
+ undef_method :swap, :parent, :namespace, :default_namespace=
194
+ undef_method :add_namespace_definition, :attributes
195
+ undef_method :namespace_definitions, :line, :add_namespace
196
+
197
+ def add_child child
198
+ raise "Document already has a root node" if root
199
+ if child.type == Node::DOCUMENT_FRAG_NODE
200
+ raise "Document cannot have multiple root nodes" if child.children.size > 1
201
+ super(child.children.first)
202
+ else
203
+ super
204
+ end
205
+ end
206
+ alias :<< :add_child
207
+
208
+ private
209
+ def implied_xpath_context
210
+ "/"
211
+ end
212
+
213
+ def inspect_attributes
214
+ [:name, :children]
215
+ end
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,84 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DocumentFragment < Nokogiri::XML::Node
4
+ ##
5
+ # Create a new DocumentFragment from +tags+.
6
+ #
7
+ # If +ctx+ is present, it is used as a context node for the
8
+ # subtree created, e.g., namespaces will be resolved relative
9
+ # to +ctx+.
10
+ def initialize document, tags = nil, ctx = nil
11
+ return self unless tags
12
+
13
+ children = if ctx
14
+ ctx.parse(tags)
15
+ else
16
+ XML::Document.parse("<root>#{tags}</root>") \
17
+ .xpath("/root/node()")
18
+ end
19
+ children.each { |child| child.parent = self }
20
+ end
21
+
22
+ ###
23
+ # return the name for DocumentFragment
24
+ def name
25
+ '#document-fragment'
26
+ end
27
+
28
+ ###
29
+ # Convert this DocumentFragment to a string
30
+ def to_s
31
+ children.to_s
32
+ end
33
+
34
+ ###
35
+ # Convert this DocumentFragment to html
36
+ # See Nokogiri::XML::NodeSet#to_html
37
+ def to_html *args
38
+ children.to_html(*args)
39
+ end
40
+
41
+ ###
42
+ # Convert this DocumentFragment to xhtml
43
+ # See Nokogiri::XML::NodeSet#to_xhtml
44
+ def to_xhtml *args
45
+ children.to_xhtml(*args)
46
+ end
47
+
48
+ ###
49
+ # Convert this DocumentFragment to xml
50
+ # See Nokogiri::XML::NodeSet#to_xml
51
+ def to_xml *args
52
+ children.to_xml(*args)
53
+ end
54
+
55
+ ###
56
+ # Search this fragment. See Nokogiri::XML::Node#css
57
+ def css *args
58
+ if children.any?
59
+ children.css(*args)
60
+ else
61
+ NodeSet.new(document)
62
+ end
63
+ end
64
+
65
+ alias :serialize :to_s
66
+
67
+ class << self
68
+ ####
69
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
70
+ def parse tags
71
+ self.new(XML::Document.new, tags)
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def coerce data
78
+ return super unless String === data
79
+
80
+ document.fragment(data).children
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,22 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DTD < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :values
6
+ undef_method :content
7
+ undef_method :namespace
8
+ undef_method :namespace_definitions
9
+ undef_method :line if method_defined?(:line)
10
+
11
+ def keys
12
+ attributes.keys
13
+ end
14
+
15
+ def each &block
16
+ attributes.each { |key, value|
17
+ block.call([key, value])
18
+ }
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Represents the allowed content in an Element Declaration inside a DTD:
5
+ #
6
+ # <?xml version="1.0"?><?TEST-STYLE PIDATA?>
7
+ # <!DOCTYPE staff SYSTEM "staff.dtd" [
8
+ # <!ELEMENT div1 (head, (p | list | note)*, div2*)>
9
+ # ]>
10
+ # </root>
11
+ #
12
+ # ElementContent represents the tree inside the <!ELEMENT> tag shown above
13
+ # that lists the possible content for the div1 tag.
14
+ class ElementContent
15
+ # Possible definitions of type
16
+ PCDATA = 1
17
+ ELEMENT = 2
18
+ SEQ = 3
19
+ OR = 4
20
+
21
+ # Possible content occurrences
22
+ ONCE = 1
23
+ OPT = 2
24
+ MULT = 3
25
+ PLUS = 4
26
+
27
+ attr_reader :document
28
+
29
+ ###
30
+ # Get the children of this ElementContent node
31
+ def children
32
+ [c1, c2].compact
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ElementDecl < Nokogiri::XML::Node
4
+ undef_method :namespace
5
+ undef_method :namespace_definitions
6
+ undef_method :line if method_defined?(:line)
7
+
8
+ def inspect
9
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module Nokogiri
2
+ module XML
3
+ class EntityDecl < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :attributes
6
+ undef_method :namespace
7
+ undef_method :namespace_definitions
8
+ undef_method :line if method_defined?(:line)
9
+
10
+ def self.new name, doc, *args
11
+ doc.create_entity(name, *args)
12
+ end
13
+
14
+ def inspect
15
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Namespace
4
+ include Nokogiri::XML::PP::Node
5
+ attr_reader :document
6
+
7
+ private
8
+ def inspect_attributes
9
+ [:prefix, :href]
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,907 @@
1
+ require 'stringio'
2
+ require 'nokogiri/xml/node/save_options'
3
+
4
+ module Nokogiri
5
+ module XML
6
+ ####
7
+ # Nokogiri::XML::Node is your window to the fun filled world of dealing
8
+ # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
9
+ # to a hash with regard to attributes. For example (from irb):
10
+ #
11
+ # irb(main):004:0> node
12
+ # => <a href="#foo" id="link">link</a>
13
+ # irb(main):005:0> node['href']
14
+ # => "#foo"
15
+ # irb(main):006:0> node.keys
16
+ # => ["href", "id"]
17
+ # irb(main):007:0> node.values
18
+ # => ["#foo", "link"]
19
+ # irb(main):008:0> node['class'] = 'green'
20
+ # => "green"
21
+ # irb(main):009:0> node
22
+ # => <a href="#foo" id="link" class="green">link</a>
23
+ # irb(main):010:0>
24
+ #
25
+ # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
26
+ #
27
+ # Nokogiri::XML::Node also has methods that let you move around your
28
+ # tree. For navigating your tree, see:
29
+ #
30
+ # * Nokogiri::XML::Node#parent
31
+ # * Nokogiri::XML::Node#children
32
+ # * Nokogiri::XML::Node#next
33
+ # * Nokogiri::XML::Node#previous
34
+ #
35
+ # You may search this node's subtree using Node#xpath and Node#css
36
+ class Node
37
+ include Nokogiri::XML::PP::Node
38
+ include Enumerable
39
+
40
+ # Element node type, see Nokogiri::XML::Node#element?
41
+ ELEMENT_NODE = 1
42
+ # Attribute node type
43
+ ATTRIBUTE_NODE = 2
44
+ # Text node type, see Nokogiri::XML::Node#text?
45
+ TEXT_NODE = 3
46
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
47
+ CDATA_SECTION_NODE = 4
48
+ # Entity reference node type
49
+ ENTITY_REF_NODE = 5
50
+ # Entity node type
51
+ ENTITY_NODE = 6
52
+ # PI node type
53
+ PI_NODE = 7
54
+ # Comment node type, see Nokogiri::XML::Node#comment?
55
+ COMMENT_NODE = 8
56
+ # Document node type, see Nokogiri::XML::Node#xml?
57
+ DOCUMENT_NODE = 9
58
+ # Document type node type
59
+ DOCUMENT_TYPE_NODE = 10
60
+ # Document fragment node type
61
+ DOCUMENT_FRAG_NODE = 11
62
+ # Notation node type
63
+ NOTATION_NODE = 12
64
+ # HTML document node type, see Nokogiri::XML::Node#html?
65
+ HTML_DOCUMENT_NODE = 13
66
+ # DTD node type
67
+ DTD_NODE = 14
68
+ # Element declaration type
69
+ ELEMENT_DECL = 15
70
+ # Attribute declaration type
71
+ ATTRIBUTE_DECL = 16
72
+ # Entity declaration type
73
+ ENTITY_DECL = 17
74
+ # Namespace declaration type
75
+ NAMESPACE_DECL = 18
76
+ # XInclude start type
77
+ XINCLUDE_START = 19
78
+ # XInclude end type
79
+ XINCLUDE_END = 20
80
+ # DOCB document node type
81
+ DOCB_DOCUMENT_NODE = 21
82
+
83
+ def initialize name, document # :nodoc:
84
+ # ... Ya. This is empty on purpose.
85
+ end
86
+
87
+ ###
88
+ # Decorate this node with the decorators set up in this node's Document
89
+ def decorate!
90
+ document.decorate(self)
91
+ end
92
+
93
+ ###
94
+ # Search this node for +paths+. +paths+ can be XPath or CSS, and an
95
+ # optional hash of namespaces may be appended.
96
+ # See Node#xpath and Node#css.
97
+ def search *paths
98
+ # TODO use paths, handler, ns, binds = extract_params(paths)
99
+ ns = paths.last.is_a?(Hash) ? paths.pop :
100
+ (document.root ? document.root.namespaces : {})
101
+
102
+ prefix = "#{implied_xpath_context}/"
103
+
104
+ xpath(*(paths.map { |path|
105
+ path = path.to_s
106
+ path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
107
+ path,
108
+ :prefix => prefix,
109
+ :ns => ns
110
+ )
111
+ }.flatten.uniq) + [ns])
112
+ end
113
+ alias :/ :search
114
+
115
+ ###
116
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
+ #
118
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
119
+ # queries.
120
+ #
121
+ # node.xpath('.//title')
122
+ #
123
+ # A hash of namespace bindings may be appended. For example:
124
+ #
125
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
126
+ # node.xpath('.//xmlns:name', node.root.namespaces)
127
+ #
128
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
+ #
130
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
+ #
132
+ # Custom XPath functions may also be defined. To define custom
133
+ # functions create a class and implement the function you want
134
+ # to define. The first argument to the method will be the
135
+ # current matching NodeSet. Any other arguments are ones that
136
+ # you pass in. Note that this class may appear anywhere in the
137
+ # argument list. For example:
138
+ #
139
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
140
+ # def regex node_set, regex
141
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
142
+ # end
143
+ # }.new)
144
+ #
145
+ def xpath *paths
146
+ return NodeSet.new(document) unless document
147
+
148
+ paths, handler, ns, binds = extract_params(paths)
149
+
150
+ sets = paths.map { |path|
151
+ ctx = XPathContext.new(self)
152
+ ctx.register_namespaces(ns)
153
+ path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
154
+
155
+ binds.each do |key,value|
156
+ ctx.register_variable key.to_s, value
157
+ end if binds
158
+
159
+ ctx.evaluate(path, handler)
160
+ }
161
+ return sets.first if sets.length == 1
162
+
163
+ NodeSet.new(document) do |combined|
164
+ sets.each do |set|
165
+ set.each do |node|
166
+ combined << node
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ ###
173
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
174
+ #
175
+ # Search this node for CSS +rules+. +rules+ must be one or more CSS
176
+ # selectors. For example:
177
+ #
178
+ # node.css('title')
179
+ # node.css('body h1.bold')
180
+ # node.css('div + p.green', 'div#one')
181
+ #
182
+ # A hash of namespace bindings may be appended. For example:
183
+ #
184
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
185
+ #
186
+ # Custom CSS pseudo classes may also be defined. To define
187
+ # custom pseudo classes, create a class and implement the custom
188
+ # pseudo class you want defined. The first argument to the
189
+ # method will be the current matching NodeSet. Any other
190
+ # arguments are ones that you pass in. For example:
191
+ #
192
+ # node.css('title:regex("\w+")', Class.new {
193
+ # def regex node_set, regex
194
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
195
+ # end
196
+ # }.new)
197
+ #
198
+ # Note that the CSS query string is case-sensitive with regards
199
+ # to your document type. That is, if you're looking for "H1" in
200
+ # an HTML document, you'll never find anything, since HTML tags
201
+ # will match only lowercase CSS queries. However, "H1" might be
202
+ # found in an XML document, where tags names are case-sensitive
203
+ # (e.g., "H1" is distinct from "h1").
204
+ #
205
+ def css *rules
206
+ rules, handler, ns, binds = extract_params(rules)
207
+
208
+ prefix = "#{implied_xpath_context}/"
209
+
210
+ rules = rules.map { |rule|
211
+ CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
212
+ }.flatten.uniq + [ns, handler, binds].compact
213
+
214
+ xpath(*rules)
215
+ end
216
+
217
+ ###
218
+ # Search this node's immediate children using CSS selector +selector+
219
+ def > selector
220
+ ns = document.root.namespaces
221
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
222
+ end
223
+
224
+ ###
225
+ # Search for the first occurrence of +path+.
226
+ #
227
+ # Returns nil if nothing is found, otherwise a Node.
228
+ def at path, ns = document.root ? document.root.namespaces : {}
229
+ search(path, ns).first
230
+ end
231
+ alias :% :at
232
+
233
+ ##
234
+ # Search this node for the first occurrence of XPath +paths+.
235
+ # Equivalent to <tt>xpath(paths).first</tt>
236
+ # See Node#xpath for more information.
237
+ #
238
+ def at_xpath *paths
239
+ xpath(*paths).first
240
+ end
241
+
242
+ ##
243
+ # Search this node for the first occurrence of CSS +rules+.
244
+ # Equivalent to <tt>css(rules).first</tt>
245
+ # See Node#css for more information.
246
+ #
247
+ def at_css *rules
248
+ css(*rules).first
249
+ end
250
+
251
+ ###
252
+ # Get the attribute value for the attribute +name+
253
+ def [] name
254
+ return nil unless key?(name.to_s)
255
+ get(name.to_s)
256
+ end
257
+
258
+ ###
259
+ # Add +node_or_tags+ as a child of this Node.
260
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
261
+ #
262
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
263
+ def add_child node_or_tags
264
+ node_or_tags = coerce(node_or_tags)
265
+ if node_or_tags.is_a?(XML::NodeSet)
266
+ node_or_tags.each { |n| add_child_node n }
267
+ else
268
+ add_child_node node_or_tags
269
+ end
270
+ node_or_tags
271
+ end
272
+
273
+ ###
274
+ # Insert +node_or_tags+ before this Node (as a sibling).
275
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
276
+ #
277
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
278
+ #
279
+ # Also see related method +before+.
280
+ def add_previous_sibling node_or_tags
281
+ node_or_tags = coerce(node_or_tags)
282
+ if node_or_tags.is_a?(XML::NodeSet)
283
+ if text?
284
+ pivot = Nokogiri::XML::Node.new 'dummy', document
285
+ add_previous_sibling_node pivot
286
+ else
287
+ pivot = self
288
+ end
289
+ node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
290
+ pivot.unlink if text?
291
+ else
292
+ add_previous_sibling_node node_or_tags
293
+ end
294
+ node_or_tags
295
+ end
296
+
297
+ ###
298
+ # Insert +node_or_tags+ after this Node (as a sibling).
299
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
300
+ #
301
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
302
+ #
303
+ # Also see related method +after+.
304
+ def add_next_sibling node_or_tags
305
+ node_or_tags = coerce(node_or_tags)
306
+ if node_or_tags.is_a?(XML::NodeSet)
307
+ if text?
308
+ pivot = Nokogiri::XML::Node.new 'dummy', document
309
+ add_next_sibling_node pivot
310
+ else
311
+ pivot = self
312
+ end
313
+ node_or_tags.reverse.each { |n| pivot.send :add_next_sibling_node, n }
314
+ pivot.unlink if text?
315
+ else
316
+ add_next_sibling_node node_or_tags
317
+ end
318
+ node_or_tags
319
+ end
320
+
321
+ ####
322
+ # Insert +node_or_tags+ before this node (as a sibling).
323
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
324
+ #
325
+ # Returns self, to support chaining of calls.
326
+ #
327
+ # Also see related method +add_previous_sibling+.
328
+ def before node_or_tags
329
+ add_previous_sibling node_or_tags
330
+ self
331
+ end
332
+
333
+ ####
334
+ # Insert +node_or_tags+ after this node (as a sibling).
335
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
336
+ #
337
+ # Returns self, to support chaining of calls.
338
+ #
339
+ # Also see related method +add_next_sibling+.
340
+ def after node_or_tags
341
+ add_next_sibling node_or_tags
342
+ self
343
+ end
344
+
345
+ ####
346
+ # Set the inner html for this Node to +node_or_tags+
347
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
348
+ #
349
+ # Returns self.
350
+ #
351
+ # Also see related method +children=+
352
+ def inner_html= node_or_tags
353
+ self.children = node_or_tags
354
+ self
355
+ end
356
+
357
+ ####
358
+ # Set the inner html for this Node +node_or_tags+
359
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
360
+ #
361
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
362
+ #
363
+ # Also see related method +inner_html=+
364
+ def children= node_or_tags
365
+ node_or_tags = coerce(node_or_tags)
366
+ children.unlink
367
+ if node_or_tags.is_a?(XML::NodeSet)
368
+ node_or_tags.each { |n| add_child_node n }
369
+ else
370
+ add_child_node node_or_tags
371
+ end
372
+ node_or_tags
373
+ end
374
+
375
+ ####
376
+ # Replace this Node with +node_or_tags+.
377
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
378
+ #
379
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
380
+ #
381
+ # Also see related method +swap+.
382
+ def replace node_or_tags
383
+ node_or_tags = coerce(node_or_tags)
384
+ if node_or_tags.is_a?(XML::NodeSet)
385
+ if text?
386
+ replacee = Nokogiri::XML::Node.new 'dummy', document
387
+ add_previous_sibling_node replacee
388
+ unlink
389
+ else
390
+ replacee = self
391
+ end
392
+ node_or_tags.each { |n| replacee.add_previous_sibling n }
393
+ replacee.unlink
394
+ else
395
+ replace_node node_or_tags
396
+ end
397
+ node_or_tags
398
+ end
399
+
400
+ ####
401
+ # Swap this Node for +node_or_tags+
402
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
403
+ #
404
+ # Returns self, to support chaining of calls.
405
+ #
406
+ # Also see related method +replace+.
407
+ def swap node_or_tags
408
+ replace node_or_tags
409
+ self
410
+ end
411
+
412
+ alias :next :next_sibling
413
+ alias :previous :previous_sibling
414
+
415
+ # :stopdoc:
416
+ # HACK: This is to work around an RDoc bug
417
+ alias :next= :add_next_sibling
418
+ # :startdoc:
419
+
420
+ alias :previous= :add_previous_sibling
421
+ alias :remove :unlink
422
+ alias :get_attribute :[]
423
+ alias :attr :[]
424
+ alias :set_attribute :[]=
425
+ alias :text :content
426
+ alias :inner_text :content
427
+ alias :has_attribute? :key?
428
+ alias :<< :add_child
429
+ alias :name :node_name
430
+ alias :name= :node_name=
431
+ alias :type :node_type
432
+ alias :to_str :text
433
+ alias :clone :dup
434
+ alias :elements :element_children
435
+
436
+ ####
437
+ # Returns a hash containing the node's attributes. The key is
438
+ # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
439
+ # representing the attribute.
440
+ # If you need to distinguish attributes with the same name, with different namespaces
441
+ # use #attribute_nodes instead.
442
+ def attributes
443
+ Hash[*(attribute_nodes.map { |node|
444
+ [node.node_name, node]
445
+ }.flatten)]
446
+ end
447
+
448
+ ###
449
+ # Get the attribute values for this Node.
450
+ def values
451
+ attribute_nodes.map { |node| node.value }
452
+ end
453
+
454
+ ###
455
+ # Get the attribute names for this Node.
456
+ def keys
457
+ attribute_nodes.map { |node| node.node_name }
458
+ end
459
+
460
+ ###
461
+ # Iterate over each attribute name and value pair for this Node.
462
+ def each &block
463
+ attribute_nodes.each { |node|
464
+ block.call([node.node_name, node.value])
465
+ }
466
+ end
467
+
468
+ ###
469
+ # Remove the attribute named +name+
470
+ def remove_attribute name
471
+ attributes[name].remove if key? name
472
+ end
473
+ alias :delete :remove_attribute
474
+
475
+ ###
476
+ # Returns true if this Node matches +selector+
477
+ def matches? selector
478
+ ancestors.last.search(selector).include?(self)
479
+ end
480
+
481
+ ###
482
+ # Create a DocumentFragment containing +tags+ that is relative to _this_
483
+ # context node.
484
+ def fragment tags
485
+ type = document.html? ? Nokogiri::HTML : Nokogiri::XML
486
+ type::DocumentFragment.new(document, tags, self)
487
+ end
488
+
489
+ ###
490
+ # Parse +string_or_io+ as a document fragment within the context of
491
+ # *this* node. Returns a XML::NodeSet containing the nodes parsed from
492
+ # +string_or_io+.
493
+ def parse string_or_io, options = nil
494
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
495
+ if Fixnum === options
496
+ options = Nokogiri::XML::ParseOptions.new(options)
497
+ end
498
+ # Give the options to the user
499
+ yield options if block_given?
500
+
501
+ contents = string_or_io.respond_to?(:read) ?
502
+ string_or_io.read :
503
+ string_or_io
504
+
505
+ return Nokogiri::XML::NodeSet.new(document) if contents.empty?
506
+
507
+ ##
508
+ # This is a horrible hack, but I don't care. See #313 for background.
509
+ error_count = document.errors.length
510
+ node_set = in_context(contents, options.to_i)
511
+ if node_set.empty? and document.errors.length > error_count and options.recover?
512
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
513
+ node_set = fragment.children
514
+ end
515
+ node_set
516
+ end
517
+
518
+ ####
519
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
520
+ def content= string
521
+ self.native_content = encode_special_chars(string.to_s)
522
+ end
523
+
524
+ ###
525
+ # Set the parent Node for this Node
526
+ def parent= parent_node
527
+ parent_node.add_child(self)
528
+ parent_node
529
+ end
530
+
531
+ ###
532
+ # Returns a Hash of {prefix => value} for all namespaces on this
533
+ # node and its ancestors.
534
+ #
535
+ # This method returns the same namespaces as #namespace_scopes.
536
+ #
537
+ # Returns namespaces in scope for self -- those defined on self
538
+ # element directly or any ancestor node -- as a Hash of
539
+ # attribute-name/value pairs. Note that the keys in this hash
540
+ # XML attributes that would be used to define this namespace,
541
+ # such as "xmlns:prefix", not just the prefix. Default namespace
542
+ # set on self will be included with key "xmlns". However,
543
+ # default namespaces set on ancestor will NOT be, even if self
544
+ # has no explicit default namespace.
545
+ def namespaces
546
+ Hash[*namespace_scopes.map { |nd|
547
+ key = ['xmlns', nd.prefix].compact.join(':')
548
+ if RUBY_VERSION >= '1.9' && document.encoding
549
+ begin
550
+ key.force_encoding document.encoding
551
+ rescue ArgumentError
552
+ end
553
+ end
554
+ [key, nd.href]
555
+ }.flatten]
556
+ end
557
+
558
+ # Returns true if this is a Comment
559
+ def comment?
560
+ type == COMMENT_NODE
561
+ end
562
+
563
+ # Returns true if this is a CDATA
564
+ def cdata?
565
+ type == CDATA_SECTION_NODE
566
+ end
567
+
568
+ # Returns true if this is an XML::Document node
569
+ def xml?
570
+ type == DOCUMENT_NODE
571
+ end
572
+
573
+ # Returns true if this is an HTML::Document node
574
+ def html?
575
+ type == HTML_DOCUMENT_NODE
576
+ end
577
+
578
+ # Returns true if this is a Text node
579
+ def text?
580
+ type == TEXT_NODE
581
+ end
582
+
583
+ # Returns true if this is a DocumentFragment
584
+ def fragment?
585
+ type == DOCUMENT_FRAG_NODE
586
+ end
587
+
588
+ ###
589
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
590
+ # nil on XML documents and on unknown tags.
591
+ def description
592
+ return nil if document.xml?
593
+ Nokogiri::HTML::ElementDescription[name]
594
+ end
595
+
596
+ ###
597
+ # Is this a read only node?
598
+ def read_only?
599
+ # According to gdome2, these are read-only node types
600
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
601
+ end
602
+
603
+ # Returns true if this is an Element node
604
+ def element?
605
+ type == ELEMENT_NODE
606
+ end
607
+ alias :elem? :element?
608
+
609
+ ###
610
+ # Turn this node in to a string. If the document is HTML, this method
611
+ # returns html. If the document is XML, this method returns XML.
612
+ def to_s
613
+ document.xml? ? to_xml : to_html
614
+ end
615
+
616
+ # Get the inner_html for this node's Node#children
617
+ def inner_html *args
618
+ children.map { |x| x.to_html(*args) }.join
619
+ end
620
+
621
+ # Get the path to this node as a CSS expression
622
+ def css_path
623
+ path.split(/\//).map { |part|
624
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
625
+ }.compact.join(' > ')
626
+ end
627
+
628
+ ###
629
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
630
+ # the ancestors must match +selector+
631
+ def ancestors selector = nil
632
+ return NodeSet.new(document) unless respond_to?(:parent)
633
+ return NodeSet.new(document) unless parent
634
+
635
+ parents = [parent]
636
+
637
+ while parents.last.respond_to?(:parent)
638
+ break unless ctx_parent = parents.last.parent
639
+ parents << ctx_parent
640
+ end
641
+
642
+ return NodeSet.new(document, parents) unless selector
643
+
644
+ root = parents.last
645
+
646
+ NodeSet.new(document, parents.find_all { |parent|
647
+ root.search(selector).include?(parent)
648
+ })
649
+ end
650
+
651
+ ###
652
+ # Adds a default namespace supplied as a string +url+ href, to self.
653
+ # The consequence is as an xmlns attribute with supplied argument were
654
+ # present in parsed XML. A default namespace set with this method will
655
+ # now show up in #attributes, but when this node is serialized to XML an
656
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
657
+ def default_namespace= url
658
+ add_namespace_definition(nil, url)
659
+ end
660
+ alias :add_namespace :add_namespace_definition
661
+
662
+ ###
663
+ # Set the default namespace on this node (as would be defined with an
664
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
665
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
666
+ # for this node. You probably want #default_namespace= instead, or perhaps
667
+ # #add_namespace_definition with a nil prefix argument.
668
+ def namespace= ns
669
+ return set_namespace(ns) unless ns
670
+
671
+ unless Nokogiri::XML::Namespace === ns
672
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
673
+ end
674
+ if ns.document != document
675
+ raise ArgumentError, 'namespace must be declared on the same document'
676
+ end
677
+
678
+ set_namespace ns
679
+ end
680
+
681
+ ####
682
+ # Yields self and all children to +block+ recursively.
683
+ def traverse &block
684
+ children.each{|j| j.traverse(&block) }
685
+ block.call(self)
686
+ end
687
+
688
+ ###
689
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
690
+ def accept visitor
691
+ visitor.visit(self)
692
+ end
693
+
694
+ ###
695
+ # Test to see if this Node is equal to +other+
696
+ def == other
697
+ return false unless other
698
+ return false unless other.respond_to?(:pointer_id)
699
+ pointer_id == other.pointer_id
700
+ end
701
+
702
+ ###
703
+ # Serialize Node using +options+. Save options can also be set using a
704
+ # block. See SaveOptions.
705
+ #
706
+ # These two statements are equivalent:
707
+ #
708
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
709
+ #
710
+ # or
711
+ #
712
+ # node.serialize(:encoding => 'UTF-8') do |config|
713
+ # config.format.as_xml
714
+ # end
715
+ #
716
+ def serialize *args, &block
717
+ options = args.first.is_a?(Hash) ? args.shift : {
718
+ :encoding => args[0],
719
+ :save_with => args[1] || SaveOptions::FORMAT
720
+ }
721
+
722
+ encoding = options[:encoding] || document.encoding
723
+ options[:encoding] = encoding
724
+
725
+ outstring = ""
726
+ if encoding && outstring.respond_to?(:force_encoding)
727
+ outstring.force_encoding(Encoding.find(encoding))
728
+ end
729
+ io = StringIO.new(outstring)
730
+ write_to io, options, &block
731
+ io.string
732
+ end
733
+
734
+ ###
735
+ # Serialize this Node to HTML
736
+ #
737
+ # doc.to_html
738
+ #
739
+ # See Node#write_to for a list of +options+. For formatted output,
740
+ # use Node#to_xhtml instead.
741
+ def to_html options = {}
742
+ # FIXME: this is a hack around broken libxml versions
743
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
744
+
745
+ options[:save_with] ||= SaveOptions::FORMAT |
746
+ SaveOptions::NO_DECLARATION |
747
+ SaveOptions::NO_EMPTY_TAGS |
748
+ SaveOptions::AS_HTML
749
+
750
+ serialize(options)
751
+ end
752
+
753
+ ###
754
+ # Serialize this Node to XML using +options+
755
+ #
756
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
757
+ #
758
+ # See Node#write_to for a list of +options+
759
+ def to_xml options = {}
760
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
761
+
762
+ serialize(options)
763
+ end
764
+
765
+ ###
766
+ # Serialize this Node to XHTML using +options+
767
+ #
768
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
769
+ #
770
+ # See Node#write_to for a list of +options+
771
+ def to_xhtml options = {}
772
+ # FIXME: this is a hack around broken libxml versions
773
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
774
+
775
+ options[:save_with] ||= SaveOptions::FORMAT |
776
+ SaveOptions::NO_DECLARATION |
777
+ SaveOptions::NO_EMPTY_TAGS |
778
+ SaveOptions::AS_XHTML
779
+
780
+ serialize(options)
781
+ end
782
+
783
+ ###
784
+ # Write Node to +io+ with +options+. +options+ modify the output of
785
+ # this method. Valid options are:
786
+ #
787
+ # * +:encoding+ for changing the encoding
788
+ # * +:indent_text+ the indentation text, defaults to one space
789
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
790
+ # * +:save_with+ a combination of SaveOptions constants.
791
+ #
792
+ # To save with UTF-8 indented twice:
793
+ #
794
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
795
+ #
796
+ # To save indented with two dashes:
797
+ #
798
+ # node.write_to(io, :indent_text => '-', :indent => 2
799
+ #
800
+ def write_to io, *options
801
+ options = options.first.is_a?(Hash) ? options.shift : {}
802
+ encoding = options[:encoding] || options[0]
803
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
804
+ indent_text = options[:indent_text] || ' '
805
+ indent_times = options[:indent] || 2
806
+
807
+ config = SaveOptions.new(save_options.to_i)
808
+ yield config if block_given?
809
+
810
+ native_write_to(io, encoding, indent_text * indent_times, config.options)
811
+ end
812
+
813
+ ###
814
+ # Write Node as HTML to +io+ with +options+
815
+ #
816
+ # See Node#write_to for a list of +options+
817
+ def write_html_to io, options = {}
818
+ # FIXME: this is a hack around broken libxml versions
819
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
820
+
821
+ options[:save_with] ||= SaveOptions::FORMAT |
822
+ SaveOptions::NO_DECLARATION |
823
+ SaveOptions::NO_EMPTY_TAGS |
824
+ SaveOptions::AS_HTML
825
+ write_to io, options
826
+ end
827
+
828
+ ###
829
+ # Write Node as XHTML to +io+ with +options+
830
+ #
831
+ # See Node#write_to for a list of +options+
832
+ def write_xhtml_to io, options = {}
833
+ # FIXME: this is a hack around broken libxml versions
834
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
835
+
836
+ options[:save_with] ||= SaveOptions::FORMAT |
837
+ SaveOptions::NO_DECLARATION |
838
+ SaveOptions::NO_EMPTY_TAGS |
839
+ SaveOptions::AS_XHTML
840
+ write_to io, options
841
+ end
842
+
843
+ ###
844
+ # Write Node as XML to +io+ with +options+
845
+ #
846
+ # doc.write_xml_to io, :encoding => 'UTF-8'
847
+ #
848
+ # See Node#write_to for a list of options
849
+ def write_xml_to io, options = {}
850
+ options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
851
+ write_to io, options
852
+ end
853
+
854
+ ###
855
+ # Compare two Node objects with respect to their Document. Nodes from
856
+ # different documents cannot be compared.
857
+ def <=> other
858
+ return nil unless other.is_a?(Nokogiri::XML::Node)
859
+ return nil unless document == other.document
860
+ compare other
861
+ end
862
+
863
+ private
864
+
865
+ def extract_params params # :nodoc:
866
+ # Pop off our custom function handler if it exists
867
+ handler = params.find { |param|
868
+ ![Hash, String, Symbol].include?(param.class)
869
+ }
870
+
871
+ params -= [handler] if handler
872
+
873
+ hashes = []
874
+ hashes << params.pop while Hash === params.last || params.last.nil?
875
+
876
+ ns, binds = hashes.reverse
877
+
878
+ ns ||= document.root ? document.root.namespaces : {}
879
+
880
+ [params, handler, ns, binds]
881
+ end
882
+
883
+ def coerce data # :nodoc:
884
+ return data if data.is_a?(XML::NodeSet)
885
+ return data.children if data.is_a?(XML::DocumentFragment)
886
+ return fragment(data).children if data.is_a?(String)
887
+
888
+ if data.is_a?(Document) || !data.is_a?(XML::Node)
889
+ raise ArgumentError, <<-EOERR
890
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
891
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
892
+ EOERR
893
+ end
894
+
895
+ data
896
+ end
897
+
898
+ def implied_xpath_context
899
+ "./"
900
+ end
901
+
902
+ def inspect_attributes
903
+ [:name, :namespace, :attribute_nodes, :children]
904
+ end
905
+ end
906
+ end
907
+ end