nokogiri 1.5.10 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (334) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +73 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +956 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +327 -288
  94. data/lib/nokogiri/css/parser.y +67 -45
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +263 -75
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -90
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +259 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +18 -16
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  171. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  172. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  173. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  175. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  176. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  177. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  178. metadata +382 -460
  179. data/.autotest +0 -26
  180. data/.gemtest +0 -0
  181. data/CHANGELOG.ja.rdoc +0 -785
  182. data/CHANGELOG.rdoc +0 -783
  183. data/C_CODING_STYLE.rdoc +0 -33
  184. data/Manifest.txt +0 -303
  185. data/README.ja.rdoc +0 -106
  186. data/README.rdoc +0 -175
  187. data/ROADMAP.md +0 -90
  188. data/Rakefile +0 -228
  189. data/STANDARD_RESPONSES.md +0 -47
  190. data/Y_U_NO_GEMSPEC.md +0 -155
  191. data/build_all +0 -105
  192. data/ext/nokogiri/html_document.c +0 -170
  193. data/ext/nokogiri/html_document.h +0 -10
  194. data/ext/nokogiri/html_element_description.c +0 -279
  195. data/ext/nokogiri/html_element_description.h +0 -10
  196. data/ext/nokogiri/html_entity_lookup.c +0 -32
  197. data/ext/nokogiri/html_entity_lookup.h +0 -8
  198. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  199. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  200. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  201. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  202. data/ext/nokogiri/xml_attr.h +0 -9
  203. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  204. data/ext/nokogiri/xml_cdata.h +0 -9
  205. data/ext/nokogiri/xml_comment.h +0 -9
  206. data/ext/nokogiri/xml_document.h +0 -23
  207. data/ext/nokogiri/xml_document_fragment.h +0 -10
  208. data/ext/nokogiri/xml_dtd.h +0 -10
  209. data/ext/nokogiri/xml_element_content.h +0 -10
  210. data/ext/nokogiri/xml_element_decl.h +0 -9
  211. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  212. data/ext/nokogiri/xml_entity_decl.h +0 -10
  213. data/ext/nokogiri/xml_entity_reference.h +0 -9
  214. data/ext/nokogiri/xml_io.c +0 -56
  215. data/ext/nokogiri/xml_io.h +0 -11
  216. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  217. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  218. data/ext/nokogiri/xml_namespace.h +0 -13
  219. data/ext/nokogiri/xml_node.h +0 -13
  220. data/ext/nokogiri/xml_node_set.h +0 -14
  221. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  222. data/ext/nokogiri/xml_reader.h +0 -10
  223. data/ext/nokogiri/xml_relax_ng.h +0 -9
  224. data/ext/nokogiri/xml_sax_parser.h +0 -39
  225. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  226. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  227. data/ext/nokogiri/xml_schema.h +0 -9
  228. data/ext/nokogiri/xml_syntax_error.h +0 -13
  229. data/ext/nokogiri/xml_text.h +0 -9
  230. data/ext/nokogiri/xml_xpath_context.h +0 -10
  231. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  232. data/lib/nokogiri/html/document.rb +0 -254
  233. data/lib/nokogiri/html/document_fragment.rb +0 -41
  234. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  235. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  236. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  237. data/tasks/cross_compile.rb +0 -150
  238. data/tasks/nokogiri.org.rb +0 -24
  239. data/tasks/test.rb +0 -95
  240. data/test/css/test_nthiness.rb +0 -159
  241. data/test/css/test_parser.rb +0 -341
  242. data/test/css/test_tokenizer.rb +0 -198
  243. data/test/css/test_xpath_visitor.rb +0 -91
  244. data/test/decorators/test_slop.rb +0 -16
  245. data/test/files/2ch.html +0 -108
  246. data/test/files/address_book.rlx +0 -12
  247. data/test/files/address_book.xml +0 -10
  248. data/test/files/bar/bar.xsd +0 -4
  249. data/test/files/dont_hurt_em_why.xml +0 -422
  250. data/test/files/encoding.html +0 -82
  251. data/test/files/encoding.xhtml +0 -84
  252. data/test/files/exslt.xml +0 -8
  253. data/test/files/exslt.xslt +0 -35
  254. data/test/files/foo/foo.xsd +0 -4
  255. data/test/files/metacharset.html +0 -10
  256. data/test/files/noencoding.html +0 -47
  257. data/test/files/po.xml +0 -32
  258. data/test/files/po.xsd +0 -66
  259. data/test/files/shift_jis.html +0 -10
  260. data/test/files/shift_jis.xml +0 -5
  261. data/test/files/snuggles.xml +0 -3
  262. data/test/files/staff.dtd +0 -10
  263. data/test/files/staff.xml +0 -59
  264. data/test/files/staff.xslt +0 -32
  265. data/test/files/test_document_url/bar.xml +0 -2
  266. data/test/files/test_document_url/document.dtd +0 -4
  267. data/test/files/test_document_url/document.xml +0 -6
  268. data/test/files/tlm.html +0 -850
  269. data/test/files/to_be_xincluded.xml +0 -2
  270. data/test/files/valid_bar.xml +0 -2
  271. data/test/files/xinclude.xml +0 -4
  272. data/test/helper.rb +0 -154
  273. data/test/html/sax/test_parser.rb +0 -141
  274. data/test/html/sax/test_parser_context.rb +0 -46
  275. data/test/html/test_builder.rb +0 -164
  276. data/test/html/test_document.rb +0 -552
  277. data/test/html/test_document_encoding.rb +0 -138
  278. data/test/html/test_document_fragment.rb +0 -261
  279. data/test/html/test_element_description.rb +0 -105
  280. data/test/html/test_named_characters.rb +0 -14
  281. data/test/html/test_node.rb +0 -196
  282. data/test/html/test_node_encoding.rb +0 -27
  283. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  284. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  285. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  286. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  287. data/test/test_convert_xpath.rb +0 -135
  288. data/test/test_css_cache.rb +0 -45
  289. data/test/test_encoding_handler.rb +0 -46
  290. data/test/test_memory_leak.rb +0 -156
  291. data/test/test_nokogiri.rb +0 -132
  292. data/test/test_reader.rb +0 -555
  293. data/test/test_soap4r_sax.rb +0 -52
  294. data/test/test_xslt_transforms.rb +0 -254
  295. data/test/xml/node/test_save_options.rb +0 -28
  296. data/test/xml/node/test_subclass.rb +0 -44
  297. data/test/xml/sax/test_parser.rb +0 -366
  298. data/test/xml/sax/test_parser_context.rb +0 -106
  299. data/test/xml/sax/test_push_parser.rb +0 -157
  300. data/test/xml/test_attr.rb +0 -64
  301. data/test/xml/test_attribute_decl.rb +0 -86
  302. data/test/xml/test_builder.rb +0 -306
  303. data/test/xml/test_c14n.rb +0 -151
  304. data/test/xml/test_cdata.rb +0 -48
  305. data/test/xml/test_comment.rb +0 -29
  306. data/test/xml/test_document.rb +0 -828
  307. data/test/xml/test_document_encoding.rb +0 -28
  308. data/test/xml/test_document_fragment.rb +0 -223
  309. data/test/xml/test_dtd.rb +0 -103
  310. data/test/xml/test_dtd_encoding.rb +0 -33
  311. data/test/xml/test_element_content.rb +0 -56
  312. data/test/xml/test_element_decl.rb +0 -73
  313. data/test/xml/test_entity_decl.rb +0 -122
  314. data/test/xml/test_entity_reference.rb +0 -245
  315. data/test/xml/test_namespace.rb +0 -95
  316. data/test/xml/test_node.rb +0 -1137
  317. data/test/xml/test_node_attributes.rb +0 -96
  318. data/test/xml/test_node_encoding.rb +0 -107
  319. data/test/xml/test_node_inheritance.rb +0 -32
  320. data/test/xml/test_node_reparenting.rb +0 -374
  321. data/test/xml/test_node_set.rb +0 -755
  322. data/test/xml/test_parse_options.rb +0 -64
  323. data/test/xml/test_processing_instruction.rb +0 -30
  324. data/test/xml/test_reader_encoding.rb +0 -142
  325. data/test/xml/test_relax_ng.rb +0 -60
  326. data/test/xml/test_schema.rb +0 -103
  327. data/test/xml/test_syntax_error.rb +0 -12
  328. data/test/xml/test_text.rb +0 -45
  329. data/test/xml/test_unparented_node.rb +0 -422
  330. data/test/xml/test_xinclude.rb +0 -83
  331. data/test/xml/test_xpath.rb +0 -295
  332. data/test/xslt/test_custom_functions.rb +0 -133
  333. data/test/xslt/test_exception_handling.rb +0 -37
  334. data/test_all +0 -81
@@ -1,264 +1,139 @@
1
- require 'stringio'
2
- require 'nokogiri/xml/node/save_options'
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "stringio"
3
5
 
4
6
  module Nokogiri
5
7
  module XML
6
- ####
7
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
8
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
9
- # to a hash with regard to attributes. For example (from irb):
8
+ # Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
9
+ #
10
+ # == Attributes
11
+ #
12
+ # A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
13
+ # example:
14
+ #
15
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
16
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
17
+ # node['href'] # => "#foo"
18
+ # node.keys # => ["href", "id"]
19
+ # node.values # => ["#foo", "link"]
20
+ # node['class'] = 'green' # => "green"
21
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
22
+ #
23
+ # See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
24
+ #
25
+ # == Navigation
26
+ #
27
+ # Nokogiri::XML::Node also has methods that let you move around your tree:
28
+ #
29
+ # [#parent, #children, #next, #previous]
30
+ # Navigate up, down, or through siblings.
10
31
  #
11
- # irb(main):004:0> node
12
- # => <a href="#foo" id="link">link</a>
13
- # irb(main):005:0> node['href']
14
- # => "#foo"
15
- # irb(main):006:0> node.keys
16
- # => ["href", "id"]
17
- # irb(main):007:0> node.values
18
- # => ["#foo", "link"]
19
- # irb(main):008:0> node['class'] = 'green'
20
- # => "green"
21
- # irb(main):009:0> node
22
- # => <a href="#foo" id="link" class="green">link</a>
23
- # irb(main):010:0>
32
+ # See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
24
33
  #
25
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
34
+ # == Serialization
26
35
  #
27
- # Nokogiri::XML::Node also has methods that let you move around your
28
- # tree. For navigating your tree, see:
36
+ # When printing or otherwise emitting a document or a node (and its subtree), there are a few
37
+ # methods you might want to use:
29
38
  #
30
- # * Nokogiri::XML::Node#parent
31
- # * Nokogiri::XML::Node#children
32
- # * Nokogiri::XML::Node#next
33
- # * Nokogiri::XML::Node#previous
39
+ # [#content, #text, #inner_text, #to_str]
40
+ # These methods will all **emit plaintext**,
41
+ # meaning that entities will be replaced (e.g., +&lt;+ will be replaced with +<+), meaning
42
+ # that any sanitizing will likely be un-done in the output.
43
+ #
44
+ # [#to_s, #to_xml, #to_html, #inner_html]
45
+ # These methods will all **emit properly-escaped markup**, meaning that it's suitable for
46
+ # consumption by browsers, parsers, etc.
47
+ #
48
+ # See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
49
+ #
50
+ # == Searching
51
+ #
52
+ # You may search this node's subtree using methods like #xpath and #css.
53
+ #
54
+ # See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
34
55
  #
35
- # You may search this node's subtree using Node#xpath and Node#css
36
56
  class Node
37
57
  include Nokogiri::XML::PP::Node
58
+ include Nokogiri::XML::Searchable
59
+ include Nokogiri::ClassResolver
38
60
  include Enumerable
39
61
 
40
62
  # Element node type, see Nokogiri::XML::Node#element?
41
- ELEMENT_NODE = 1
63
+ ELEMENT_NODE = 1
42
64
  # Attribute node type
43
- ATTRIBUTE_NODE = 2
65
+ ATTRIBUTE_NODE = 2
44
66
  # Text node type, see Nokogiri::XML::Node#text?
45
- TEXT_NODE = 3
67
+ TEXT_NODE = 3
46
68
  # CDATA node type, see Nokogiri::XML::Node#cdata?
47
69
  CDATA_SECTION_NODE = 4
48
70
  # Entity reference node type
49
- ENTITY_REF_NODE = 5
71
+ ENTITY_REF_NODE = 5
50
72
  # Entity node type
51
- ENTITY_NODE = 6
73
+ ENTITY_NODE = 6
52
74
  # PI node type
53
- PI_NODE = 7
75
+ PI_NODE = 7
54
76
  # Comment node type, see Nokogiri::XML::Node#comment?
55
- COMMENT_NODE = 8
77
+ COMMENT_NODE = 8
56
78
  # Document node type, see Nokogiri::XML::Node#xml?
57
- DOCUMENT_NODE = 9
79
+ DOCUMENT_NODE = 9
58
80
  # Document type node type
59
81
  DOCUMENT_TYPE_NODE = 10
60
82
  # Document fragment node type
61
83
  DOCUMENT_FRAG_NODE = 11
62
84
  # Notation node type
63
- NOTATION_NODE = 12
85
+ NOTATION_NODE = 12
64
86
  # HTML document node type, see Nokogiri::XML::Node#html?
65
87
  HTML_DOCUMENT_NODE = 13
66
88
  # DTD node type
67
- DTD_NODE = 14
89
+ DTD_NODE = 14
68
90
  # Element declaration type
69
- ELEMENT_DECL = 15
91
+ ELEMENT_DECL = 15
70
92
  # Attribute declaration type
71
- ATTRIBUTE_DECL = 16
93
+ ATTRIBUTE_DECL = 16
72
94
  # Entity declaration type
73
- ENTITY_DECL = 17
95
+ ENTITY_DECL = 17
74
96
  # Namespace declaration type
75
- NAMESPACE_DECL = 18
97
+ NAMESPACE_DECL = 18
76
98
  # XInclude start type
77
- XINCLUDE_START = 19
99
+ XINCLUDE_START = 19
78
100
  # XInclude end type
79
- XINCLUDE_END = 20
101
+ XINCLUDE_END = 20
80
102
  # DOCB document node type
81
103
  DOCB_DOCUMENT_NODE = 21
82
104
 
83
- def initialize name, document # :nodoc:
84
- # ... Ya. This is empty on purpose.
85
- end
86
-
87
- ###
88
- # Decorate this node with the decorators set up in this node's Document
89
- def decorate!
90
- document.decorate(self)
91
- end
92
-
93
- ###
94
- # Search this node for +paths+. +paths+ can be XPath or CSS, and an
95
- # optional hash of namespaces may be appended.
96
- # See Node#xpath and Node#css.
97
- def search *paths
98
- # TODO use paths, handler, ns, binds = extract_params(paths)
99
- ns = paths.last.is_a?(Hash) ? paths.pop :
100
- (document.root ? document.root.namespaces : {})
101
-
102
- prefix = "#{implied_xpath_context}/"
103
-
104
- xpath(*(paths.map { |path|
105
- path = path.to_s
106
- path =~ /^(\.\/|\/|\.\.|\.$)/ ? path : CSS.xpath_for(
107
- path,
108
- :prefix => prefix,
109
- :ns => ns
110
- )
111
- }.flatten.uniq) + [ns])
112
- end
113
- alias :/ :search
114
-
115
- ###
116
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
- #
118
- # Search this node for XPath +paths+. +paths+ must be one or more XPath
119
- # queries.
120
- #
121
- # node.xpath('.//title')
122
105
  #
123
- # A hash of namespace bindings may be appended. For example:
106
+ # :call-seq:
107
+ # new(name, document) -> Nokogiri::XML::Node
108
+ # new(name, document) { |node| ... } -> Nokogiri::XML::Node
124
109
  #
125
- # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
126
- # node.xpath('.//xmlns:name', node.root.namespaces)
110
+ # Create a new node with +name+ that belongs to +document+.
127
111
  #
128
- # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
- #
130
- # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
- #
132
- # Custom XPath functions may also be defined. To define custom
133
- # functions create a class and implement the function you want
134
- # to define. The first argument to the method will be the
135
- # current matching NodeSet. Any other arguments are ones that
136
- # you pass in. Note that this class may appear anywhere in the
137
- # argument list. For example:
138
- #
139
- # node.xpath('.//title[regex(., "\w+")]', Class.new {
140
- # def regex node_set, regex
141
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
142
- # end
143
- # }.new)
144
- #
145
- def xpath *paths
146
- return NodeSet.new(document) unless document
147
-
148
- paths, handler, ns, binds = extract_params(paths)
149
-
150
- sets = paths.map { |path|
151
- ctx = XPathContext.new(self)
152
- ctx.register_namespaces(ns)
153
- path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
154
-
155
- binds.each do |key,value|
156
- ctx.register_variable key.to_s, value
157
- end if binds
158
-
159
- ctx.evaluate(path, handler)
160
- }
161
- return sets.first if sets.length == 1
162
-
163
- NodeSet.new(document) do |combined|
164
- sets.each do |set|
165
- set.each do |node|
166
- combined << node
167
- end
168
- end
169
- end
170
- end
171
-
172
- ###
173
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
174
- #
175
- # Search this node for CSS +rules+. +rules+ must be one or more CSS
176
- # selectors. For example:
177
- #
178
- # node.css('title')
179
- # node.css('body h1.bold')
180
- # node.css('div + p.green', 'div#one')
181
- #
182
- # A hash of namespace bindings may be appended. For example:
183
- #
184
- # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
185
- #
186
- # Custom CSS pseudo classes may also be defined. To define
187
- # custom pseudo classes, create a class and implement the custom
188
- # pseudo class you want defined. The first argument to the
189
- # method will be the current matching NodeSet. Any other
190
- # arguments are ones that you pass in. For example:
191
- #
192
- # node.css('title:regex("\w+")', Class.new {
193
- # def regex node_set, regex
194
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
195
- # end
196
- # }.new)
197
- #
198
- # Note that the CSS query string is case-sensitive with regards
199
- # to your document type. That is, if you're looking for "H1" in
200
- # an HTML document, you'll never find anything, since HTML tags
201
- # will match only lowercase CSS queries. However, "H1" might be
202
- # found in an XML document, where tags names are case-sensitive
203
- # (e.g., "H1" is distinct from "h1").
204
- #
205
- def css *rules
206
- rules, handler, ns, binds = extract_params(rules)
207
-
208
- prefix = "#{implied_xpath_context}/"
209
-
210
- rules = rules.map { |rule|
211
- CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
212
- }.flatten.uniq + [ns, handler, binds].compact
213
-
214
- xpath(*rules)
215
- end
216
-
217
- ###
218
- # Search this node's immediate children using CSS selector +selector+
219
- def > selector
220
- ns = document.root.namespaces
221
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
222
- end
223
-
224
- ###
225
- # Search for the first occurrence of +path+.
112
+ # If you intend to add a node to a document tree, it's likely that you will prefer one of the
113
+ # Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
114
+ # both create an element (or subtree) and place it in the document tree.
226
115
  #
227
- # Returns nil if nothing is found, otherwise a Node.
228
- def at path, ns = document.root ? document.root.namespaces : {}
229
- search(path, ns).first
230
- end
231
- alias :% :at
232
-
233
- ##
234
- # Search this node for the first occurrence of XPath +paths+.
235
- # Equivalent to <tt>xpath(paths).first</tt>
236
- # See Node#xpath for more information.
116
+ # Another alternative, if you are concerned about performance, is
117
+ # Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
118
+ # attributes but (like this method) avoids parsing markup.
237
119
  #
238
- def at_xpath *paths
239
- xpath(*paths).first
240
- end
241
-
242
- ##
243
- # Search this node for the first occurrence of CSS +rules+.
244
- # Equivalent to <tt>css(rules).first</tt>
245
- # See Node#css for more information.
120
+ # [Parameters]
121
+ # - +name+ (String)
122
+ # - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
123
+ # [Yields] Nokogiri::XML::Node
124
+ # [Returns] Nokogiri::XML::Node
246
125
  #
247
- def at_css *rules
248
- css(*rules).first
126
+ def initialize(name, document)
127
+ # This is intentionally empty.
249
128
  end
250
129
 
251
130
  ###
252
- # Get the attribute value for the attribute +name+
253
- def [] name
254
- get(name.to_s)
131
+ # Decorate this node with the decorators set up in this node's Document
132
+ def decorate!
133
+ document.decorate(self)
255
134
  end
256
135
 
257
- ###
258
- # Set the attribute value for the attribute +name+ to +value+
259
- def []= name, value
260
- set name.to_s, value.to_s
261
- end
136
+ # :section: Manipulating Document Structure
262
137
 
263
138
  ###
264
139
  # Add +node_or_tags+ as a child of this Node.
@@ -267,16 +142,44 @@ module Nokogiri
267
142
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
268
143
  #
269
144
  # Also see related method +<<+.
270
- def add_child node_or_tags
145
+ def add_child(node_or_tags)
271
146
  node_or_tags = coerce(node_or_tags)
272
147
  if node_or_tags.is_a?(XML::NodeSet)
273
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
148
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
274
149
  else
275
- add_child_node_and_reparent_attrs node_or_tags
150
+ add_child_node_and_reparent_attrs(node_or_tags)
276
151
  end
277
152
  node_or_tags
278
153
  end
279
154
 
155
+ ###
156
+ # Add +node_or_tags+ as the first child of this Node.
157
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
158
+ #
159
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
160
+ #
161
+ # Also see related method +add_child+.
162
+ def prepend_child(node_or_tags)
163
+ if (first = children.first)
164
+ # Mimic the error add_child would raise.
165
+ raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
166
+ first.__send__(:add_sibling, :previous, node_or_tags)
167
+ else
168
+ add_child(node_or_tags)
169
+ end
170
+ end
171
+
172
+ ###
173
+ # Add html around this node
174
+ #
175
+ # Returns self
176
+ def wrap(html)
177
+ new_parent = document.parse(html).first
178
+ add_next_sibling(new_parent)
179
+ new_parent.add_child(self)
180
+ self
181
+ end
182
+
280
183
  ###
281
184
  # Add +node_or_tags+ as a child of this Node.
282
185
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
@@ -284,10 +187,11 @@ module Nokogiri
284
187
  # Returns self, to support chaining of calls (e.g., root << child1 << child2)
285
188
  #
286
189
  # Also see related method +add_child+.
287
- def << node_or_tags
288
- add_child node_or_tags
190
+ def <<(node_or_tags)
191
+ add_child(node_or_tags)
289
192
  self
290
193
  end
194
+
291
195
  ###
292
196
  # Insert +node_or_tags+ before this Node (as a sibling).
293
197
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
@@ -295,10 +199,11 @@ module Nokogiri
295
199
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
296
200
  #
297
201
  # Also see related method +before+.
298
- def add_previous_sibling node_or_tags
299
- raise ArgumentError.new("A document may not have multiple root nodes.") if parent.is_a?(XML::Document) && !node_or_tags.is_a?(XML::ProcessingInstruction)
202
+ def add_previous_sibling(node_or_tags)
203
+ raise ArgumentError,
204
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
300
205
 
301
- add_sibling :previous, node_or_tags
206
+ add_sibling(:previous, node_or_tags)
302
207
  end
303
208
 
304
209
  ###
@@ -308,10 +213,11 @@ module Nokogiri
308
213
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
309
214
  #
310
215
  # Also see related method +after+.
311
- def add_next_sibling node_or_tags
312
- raise ArgumentError.new("A document may not have multiple root nodes.") if parent.is_a?(XML::Document)
313
-
314
- add_sibling :next, node_or_tags
216
+ def add_next_sibling(node_or_tags)
217
+ raise ArgumentError,
218
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
219
+
220
+ add_sibling(:next, node_or_tags)
315
221
  end
316
222
 
317
223
  ####
@@ -321,8 +227,8 @@ module Nokogiri
321
227
  # Returns self, to support chaining of calls.
322
228
  #
323
229
  # Also see related method +add_previous_sibling+.
324
- def before node_or_tags
325
- add_previous_sibling node_or_tags
230
+ def before(node_or_tags)
231
+ add_previous_sibling(node_or_tags)
326
232
  self
327
233
  end
328
234
 
@@ -333,8 +239,8 @@ module Nokogiri
333
239
  # Returns self, to support chaining of calls.
334
240
  #
335
241
  # Also see related method +add_next_sibling+.
336
- def after node_or_tags
337
- add_next_sibling node_or_tags
242
+ def after(node_or_tags)
243
+ add_next_sibling(node_or_tags)
338
244
  self
339
245
  end
340
246
 
@@ -342,30 +248,24 @@ module Nokogiri
342
248
  # Set the inner html for this Node to +node_or_tags+
343
249
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
344
250
  #
345
- # Returns self.
346
- #
347
251
  # Also see related method +children=+
348
- def inner_html= node_or_tags
252
+ def inner_html=(node_or_tags)
349
253
  self.children = node_or_tags
350
- self
351
254
  end
352
255
 
353
256
  ####
354
257
  # Set the inner html for this Node +node_or_tags+
355
258
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
356
259
  #
357
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
358
- #
359
260
  # Also see related method +inner_html=+
360
- def children= node_or_tags
261
+ def children=(node_or_tags)
361
262
  node_or_tags = coerce(node_or_tags)
362
263
  children.unlink
363
264
  if node_or_tags.is_a?(XML::NodeSet)
364
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
265
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
365
266
  else
366
- add_child_node_and_reparent_attrs node_or_tags
267
+ add_child_node_and_reparent_attrs(node_or_tags)
367
268
  end
368
- node_or_tags
369
269
  end
370
270
 
371
271
  ####
@@ -375,25 +275,27 @@ module Nokogiri
375
275
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
376
276
  #
377
277
  # Also see related method +swap+.
378
- def replace node_or_tags
278
+ def replace(node_or_tags)
279
+ raise("Cannot replace a node with no parent") unless parent
280
+
379
281
  # We cannot replace a text node directly, otherwise libxml will return
380
282
  # an internal error at parser.c:13031, I don't know exactly why
381
283
  # libxml is trying to find a parent node that is an element or document
382
284
  # so I can't tell if this is bug in libxml or not. issue #775.
383
285
  if text?
384
- replacee = Nokogiri::XML::Node.new 'dummy', document
385
- add_previous_sibling_node replacee
286
+ replacee = Nokogiri::XML::Node.new("dummy", document)
287
+ add_previous_sibling_node(replacee)
386
288
  unlink
387
- return replacee.replace node_or_tags
289
+ return replacee.replace(node_or_tags)
388
290
  end
389
291
 
390
- node_or_tags = coerce(node_or_tags)
292
+ node_or_tags = parent.coerce(node_or_tags)
391
293
 
392
294
  if node_or_tags.is_a?(XML::NodeSet)
393
- node_or_tags.each { |n| add_previous_sibling n }
295
+ node_or_tags.each { |n| add_previous_sibling(n) }
394
296
  unlink
395
297
  else
396
- replace_node node_or_tags
298
+ replace_node(node_or_tags)
397
299
  end
398
300
  node_or_tags
399
301
  end
@@ -405,154 +307,724 @@ module Nokogiri
405
307
  # Returns self, to support chaining of calls.
406
308
  #
407
309
  # Also see related method +replace+.
408
- def swap node_or_tags
409
- replace node_or_tags
310
+ def swap(node_or_tags)
311
+ replace(node_or_tags)
410
312
  self
411
313
  end
412
314
 
413
- alias :next :next_sibling
414
- alias :previous :previous_sibling
415
-
416
- # :stopdoc:
417
- # HACK: This is to work around an RDoc bug
418
- alias :next= :add_next_sibling
419
- # :startdoc:
420
-
421
- alias :previous= :add_previous_sibling
422
- alias :remove :unlink
423
- alias :get_attribute :[]
424
- alias :attr :[]
425
- alias :set_attribute :[]=
426
- alias :text :content
427
- alias :inner_text :content
428
- alias :has_attribute? :key?
429
- alias :name :node_name
430
- alias :name= :node_name=
431
- alias :type :node_type
432
- alias :to_str :text
433
- alias :clone :dup
434
- alias :elements :element_children
435
-
436
315
  ####
437
- # Returns a hash containing the node's attributes. The key is
438
- # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
439
- # representing the attribute.
440
- # If you need to distinguish attributes with the same name, with different namespaces
441
- # use #attribute_nodes instead.
316
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
317
+ def content=(string)
318
+ self.native_content = encode_special_chars(string.to_s)
319
+ end
320
+
321
+ ###
322
+ # Set the parent Node for this Node
323
+ def parent=(parent_node)
324
+ parent_node.add_child(self)
325
+ end
326
+
327
+ ###
328
+ # Adds a default namespace supplied as a string +url+ href, to self.
329
+ # The consequence is as an xmlns attribute with supplied argument were
330
+ # present in parsed XML. A default namespace set with this method will
331
+ # now show up in #attributes, but when this node is serialized to XML an
332
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
333
+ def default_namespace=(url)
334
+ add_namespace_definition(nil, url)
335
+ end
336
+
337
+ ###
338
+ # Set the default namespace on this node (as would be defined with an
339
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
340
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
341
+ # for this node. You probably want #default_namespace= instead, or perhaps
342
+ # #add_namespace_definition with a nil prefix argument.
343
+ def namespace=(ns)
344
+ return set_namespace(ns) unless ns
345
+
346
+ unless Nokogiri::XML::Namespace === ns
347
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
348
+ end
349
+ if ns.document != document
350
+ raise ArgumentError, "namespace must be declared on the same document"
351
+ end
352
+
353
+ set_namespace(ns)
354
+ end
355
+
356
+ ###
357
+ # Do xinclude substitution on the subtree below node. If given a block, a
358
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
359
+ # passed to it, allowing more convenient modification of the parser options.
360
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
361
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
362
+ yield options if block_given?
363
+
364
+ # call c extension
365
+ process_xincludes(options.to_i)
366
+ end
367
+
368
+ alias_method :next, :next_sibling
369
+ alias_method :previous, :previous_sibling
370
+ alias_method :next=, :add_next_sibling
371
+ alias_method :previous=, :add_previous_sibling
372
+ alias_method :remove, :unlink
373
+ alias_method :name=, :node_name=
374
+ alias_method :add_namespace, :add_namespace_definition
375
+
376
+ # :section:
377
+
378
+ alias_method :inner_text, :content
379
+ alias_method :text, :content
380
+ alias_method :to_str, :content
381
+ alias_method :name, :node_name
382
+ alias_method :type, :node_type
383
+ alias_method :clone, :dup
384
+ alias_method :elements, :element_children
385
+
386
+ # :section: Working With Node Attributes
387
+
388
+ # :call-seq: [](name) → (String, nil)
389
+ #
390
+ # Fetch an attribute from this node.
391
+ #
392
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
393
+ # namespaced attributes, use #attribute_with_ns.
394
+ #
395
+ # [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
396
+ #
397
+ # *Example*
398
+ #
399
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
400
+ # child = doc.at_css("child")
401
+ # child["size"] # => "large"
402
+ # child["class"] # => "big wide tall"
403
+ #
404
+ # *Example:* Namespaced attributes will not be returned.
405
+ #
406
+ # ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
407
+ #
408
+ # doc = Nokogiri::XML(<<~EOF)
409
+ # <root xmlns:width='http://example.com/widths'>
410
+ # <child width:size='broad'/>
411
+ # </root>
412
+ # EOF
413
+ # doc.at_css("child")["size"] # => nil
414
+ # doc.at_css("child").attribute("size").value # => "broad"
415
+ # doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
416
+ # # => "broad"
417
+ #
418
+ def [](name)
419
+ get(name.to_s)
420
+ end
421
+
422
+ # :call-seq: []=(name, value) → value
423
+ #
424
+ # Update the attribute +name+ to +value+, or create the attribute if it does not exist.
425
+ #
426
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
427
+ # namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
428
+ # see the example below.
429
+ #
430
+ # [Returns] +value+
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML("<root><child/></root>")
435
+ # child = doc.at_css("child")
436
+ # child["size"] = "broad"
437
+ # child.to_html
438
+ # # => "<child size=\"broad\"></child>"
439
+ #
440
+ # *Example:* Add a namespaced attribute.
441
+ #
442
+ # doc = Nokogiri::XML(<<~EOF)
443
+ # <root xmlns:width='http://example.com/widths'>
444
+ # <child/>
445
+ # </root>
446
+ # EOF
447
+ # child = doc.at_css("child")
448
+ # child["size"] = "broad"
449
+ # ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
450
+ # child.attribute("size").namespace = ns
451
+ # doc.to_html
452
+ # # => "<root xmlns:width=\"http://example.com/widths\">\n" +
453
+ # # " <child width:size=\"broad\"></child>\n" +
454
+ # # "</root>\n"
455
+ #
456
+ def []=(name, value)
457
+ set(name.to_s, value.to_s)
458
+ end
459
+
460
+ #
461
+ # :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
462
+ #
463
+ # Fetch this node's attributes.
464
+ #
465
+ # ⚠ Because the keys do not include any namespace information for the attribute, in case of a
466
+ # simple name collision, not all attributes will be returned. In this case, you will need to
467
+ # use #attribute_nodes.
468
+ #
469
+ # [Returns]
470
+ # Hash containing attributes belonging to +self+. The hash keys are String attribute
471
+ # names (without the namespace), and the hash values are Nokogiri::XML::Attr.
472
+ #
473
+ # *Example* with no namespaces:
474
+ #
475
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
476
+ # doc.at_css("child").attributes
477
+ # # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
478
+ # # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
479
+ #
480
+ # *Example* with a namespace:
481
+ #
482
+ # doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
483
+ # doc.at_css("child").attributes
484
+ # # => {"size"=>
485
+ # # #(Attr:0x550 {
486
+ # # name = "size",
487
+ # # namespace = #(Namespace:0x564 {
488
+ # # prefix = "desc",
489
+ # # href = "http://example.com/sizes"
490
+ # # }),
491
+ # # value = "large"
492
+ # # })}
493
+ #
494
+ # *Example* with an attribute name collision:
495
+ #
496
+ # ⚠ Note that only one of the attributes is returned in the Hash.
497
+ #
498
+ # doc = Nokogiri::XML(<<~EOF)
499
+ # <root xmlns:width='http://example.com/widths'
500
+ # xmlns:height='http://example.com/heights'>
501
+ # <child width:size='broad' height:size='tall'/>
502
+ # </root>
503
+ # EOF
504
+ # doc.at_css("child").attributes
505
+ # # => {"size"=>
506
+ # # #(Attr:0x550 {
507
+ # # name = "size",
508
+ # # namespace = #(Namespace:0x564 {
509
+ # # prefix = "height",
510
+ # # href = "http://example.com/heights"
511
+ # # }),
512
+ # # value = "tall"
513
+ # # })}
514
+ #
442
515
  def attributes
443
- Hash[attribute_nodes.map { |node|
444
- [node.node_name, node]
445
- }]
516
+ attribute_nodes.each_with_object({}) do |node, hash|
517
+ hash[node.node_name] = node
518
+ end
446
519
  end
447
520
 
448
521
  ###
449
522
  # Get the attribute values for this Node.
450
523
  def values
451
- attribute_nodes.map { |node| node.value }
524
+ attribute_nodes.map(&:value)
525
+ end
526
+
527
+ ###
528
+ # Does this Node's attributes include <value>
529
+ def value?(value)
530
+ values.include?(value)
452
531
  end
453
532
 
454
533
  ###
455
534
  # Get the attribute names for this Node.
456
535
  def keys
457
- attribute_nodes.map { |node| node.node_name }
536
+ attribute_nodes.map(&:node_name)
458
537
  end
459
538
 
460
539
  ###
461
540
  # Iterate over each attribute name and value pair for this Node.
462
541
  def each
463
- attribute_nodes.each { |node|
542
+ attribute_nodes.each do |node|
464
543
  yield [node.node_name, node.value]
465
- }
544
+ end
466
545
  end
467
546
 
468
547
  ###
469
548
  # Remove the attribute named +name+
470
- def remove_attribute name
471
- attributes[name].remove if key? name
549
+ def remove_attribute(name)
550
+ attr = attributes[name].remove if key?(name)
551
+ clear_xpath_context if Nokogiri.jruby?
552
+ attr
472
553
  end
473
- alias :delete :remove_attribute
554
+
555
+ #
556
+ # :call-seq: classes() → Array<String>
557
+ #
558
+ # Fetch CSS class names of a Node.
559
+ #
560
+ # This is a convenience function and is equivalent to:
561
+ #
562
+ # node.kwattr_values("class")
563
+ #
564
+ # See related: #kwattr_values, #add_class, #append_class, #remove_class
565
+ #
566
+ # [Returns]
567
+ # The CSS classes (Array of String) present in the Node's "class" attribute. If the
568
+ # attribute is empty or non-existent, the return value is an empty array.
569
+ #
570
+ # *Example*
571
+ #
572
+ # node # => <div class="section title header"></div>
573
+ # node.classes # => ["section", "title", "header"]
574
+ #
575
+ def classes
576
+ kwattr_values("class")
577
+ end
578
+
579
+ #
580
+ # :call-seq: add_class(names) → self
581
+ #
582
+ # Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
583
+ # in the "class" attribute are _not_ added. Note that any existing duplicates in the
584
+ # "class" attribute are not removed. Compare with #append_class.
585
+ #
586
+ # This is a convenience function and is equivalent to:
587
+ #
588
+ # node.kwattr_add("class", names)
589
+ #
590
+ # See related: #kwattr_add, #classes, #append_class, #remove_class
591
+ #
592
+ # [Parameters]
593
+ # - +names+ (String, Array<String>)
594
+ #
595
+ # CSS class names to be added to the Node's "class" attribute. May be a string containing
596
+ # whitespace-delimited names, or an Array of String names. Any class names already present
597
+ # will not be added. Any class names not present will be added. If no "class" attribute
598
+ # exists, one is created.
599
+ #
600
+ # [Returns] +self+ (Node) for ease of chaining method calls.
601
+ #
602
+ # *Example:* Ensure that the node has CSS class "section"
603
+ #
604
+ # node # => <div></div>
605
+ # node.add_class("section") # => <div class="section"></div>
606
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
607
+ #
608
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
609
+ #
610
+ # Note that the CSS class "section" is not added because it is already present.
611
+ # Note also that the pre-existing duplicate CSS class "section" is not removed.
612
+ #
613
+ # node # => <div class="section section"></div>
614
+ # node.add_class("section header") # => <div class="section section header"></div>
615
+ #
616
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
617
+ #
618
+ # node # => <div></div>
619
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
620
+ #
621
+ def add_class(names)
622
+ kwattr_add("class", names)
623
+ end
624
+
625
+ #
626
+ # :call-seq: append_class(names) → self
627
+ #
628
+ # Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
629
+ #
630
+ # This is a convenience function and is equivalent to:
631
+ #
632
+ # node.kwattr_append("class", names)
633
+ #
634
+ # See related: #kwattr_append, #classes, #add_class, #remove_class
635
+ #
636
+ # [Parameters]
637
+ # - +names+ (String, Array<String>)
638
+ #
639
+ # CSS class names to be appended to the Node's "class" attribute. May be a string containing
640
+ # whitespace-delimited names, or an Array of String names. All class names passed in will be
641
+ # appended to the "class" attribute even if they are already present in the attribute
642
+ # value. If no "class" attribute exists, one is created.
643
+ #
644
+ # [Returns] +self+ (Node) for ease of chaining method calls.
645
+ #
646
+ # *Example:* Append "section" to the node's CSS "class" attribute
647
+ #
648
+ # node # => <div></div>
649
+ # node.append_class("section") # => <div class="section"></div>
650
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
651
+ #
652
+ # *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
653
+ #
654
+ # Note that the CSS class "section" is appended even though it is already present
655
+ #
656
+ # node # => <div class="section section"></div>
657
+ # node.append_class("section header") # => <div class="section section section header"></div>
658
+ #
659
+ # *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
660
+ #
661
+ # node # => <div></div>
662
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
663
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
664
+ #
665
+ def append_class(names)
666
+ kwattr_append("class", names)
667
+ end
668
+
669
+ # :call-seq:
670
+ # remove_class(css_classes) → self
671
+ #
672
+ # Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
673
+ # this node's "class" attribute are removed, including any multiple entries.
674
+ #
675
+ # If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
676
+ # attribute is deleted from the node.
677
+ #
678
+ # This is a convenience function and is equivalent to:
679
+ #
680
+ # node.kwattr_remove("class", css_classes)
681
+ #
682
+ # Also see #kwattr_remove, #classes, #add_class, #append_class
683
+ #
684
+ # [Parameters]
685
+ # - +css_classes+ (String, Array<String>)
686
+ #
687
+ # CSS class names to be removed from the Node's
688
+ # "class" attribute. May be a string containing whitespace-delimited names, or an Array of
689
+ # String names. Any class names already present will be removed. If no CSS classes remain,
690
+ # the "class" attribute is deleted.
691
+ #
692
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
693
+ #
694
+ # *Example*: Deleting a CSS class
695
+ #
696
+ # Note that all instances of the class "section" are removed from the "class" attribute.
697
+ #
698
+ # node # => <div class="section header section"></div>
699
+ # node.remove_class("section") # => <div class="header"></div>
700
+ #
701
+ # *Example*: Deleting the only remaining CSS class
702
+ #
703
+ # Note that the attribute is removed once there are no remaining classes.
704
+ #
705
+ # node # => <div class="section"></div>
706
+ # node.remove_class("section") # => <div></div>
707
+ #
708
+ # *Example*: Deleting multiple CSS classes
709
+ #
710
+ # Note that the "class" attribute is deleted once it's empty.
711
+ #
712
+ # node # => <div class="section header float"></div>
713
+ # node.remove_class(["section", "float"]) # => <div class="header"></div>
714
+ #
715
+ def remove_class(names = nil)
716
+ kwattr_remove("class", names)
717
+ end
718
+
719
+ # :call-seq:
720
+ # kwattr_values(attribute_name) → Array<String>
721
+ #
722
+ # Fetch values from a keyword attribute of a Node.
723
+ #
724
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
725
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
726
+ # contain CSS classes. But other keyword attributes exist, for instance
727
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
728
+ #
729
+ # See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
730
+ #
731
+ # [Parameters]
732
+ # - +attribute_name+ (String) The name of the keyword attribute to be inspected.
733
+ #
734
+ # [Returns]
735
+ # (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
736
+ # attribute is empty or non-existent, the return value is an empty array.
737
+ #
738
+ # *Example:*
739
+ #
740
+ # node # => <a rel="nofollow noopener external">link</a>
741
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
742
+ #
743
+ # Since v1.11.0
744
+ def kwattr_values(attribute_name)
745
+ keywordify(get_attribute(attribute_name) || [])
746
+ end
747
+
748
+ # :call-seq:
749
+ # kwattr_add(attribute_name, keywords) → self
750
+ #
751
+ # Ensure that values are present in a keyword attribute.
752
+ #
753
+ # Any values in +keywords+ that already exist in the Node's attribute values are _not_
754
+ # added. Note that any existing duplicates in the attribute values are not removed. Compare
755
+ # with #kwattr_append.
756
+ #
757
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
758
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
759
+ # contain CSS classes. But other keyword attributes exist, for instance
760
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
761
+ #
762
+ # See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
763
+ #
764
+ # [Parameters]
765
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
766
+ # - +keywords+ (String, Array<String>)
767
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
768
+ # whitespace-delimited values, or an Array of String values. Any values already present will
769
+ # not be added. Any values not present will be added. If the named attribute does not exist,
770
+ # it is created.
771
+ #
772
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
773
+ #
774
+ # *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
775
+ #
776
+ # Note that duplicates are not added.
777
+ #
778
+ # node # => <a></a>
779
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
780
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
781
+ #
782
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
783
+ # String argument.
784
+ #
785
+ # Note that "nofollow" is not added because it is already present. Note also that the
786
+ # pre-existing duplicate "nofollow" is not removed.
787
+ #
788
+ # node # => <a rel="nofollow nofollow"></a>
789
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
790
+ #
791
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
792
+ # an Array argument.
793
+ #
794
+ # node # => <a></a>
795
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
796
+ #
797
+ # Since v1.11.0
798
+ def kwattr_add(attribute_name, keywords)
799
+ keywords = keywordify(keywords)
800
+ current_kws = kwattr_values(attribute_name)
801
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
802
+ set_attribute(attribute_name, new_kws)
803
+ self
804
+ end
805
+
806
+ # :call-seq:
807
+ # kwattr_append(attribute_name, keywords) → self
808
+ #
809
+ # Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
810
+ # #kwattr_add.
811
+ #
812
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
813
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
814
+ # contain CSS classes. But other keyword attributes exist, for instance
815
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
816
+ #
817
+ # See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
818
+ #
819
+ # [Parameters]
820
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
821
+ # - +keywords+ (String, Array<String>)
822
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
823
+ # whitespace-delimited values, or an Array of String values. All values passed in will be
824
+ # appended to the named attribute even if they are already present in the attribute. If the
825
+ # named attribute does not exist, it is created.
826
+ #
827
+ # [Returns] +self+ (Node) for ease of chaining method calls.
828
+ #
829
+ # *Example:* Append "nofollow" to the +rel+ attribute.
830
+ #
831
+ # Note that duplicates are added.
832
+ #
833
+ # node # => <a></a>
834
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
835
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
836
+ #
837
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
838
+ #
839
+ # Note that "nofollow" is appended even though it is already present.
840
+ #
841
+ # node # => <a rel="nofollow"></a>
842
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
843
+ #
844
+ #
845
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
846
+ #
847
+ # node # => <a></a>
848
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
849
+ #
850
+ # Since v1.11.0
851
+ def kwattr_append(attribute_name, keywords)
852
+ keywords = keywordify(keywords)
853
+ current_kws = kwattr_values(attribute_name)
854
+ new_kws = (current_kws + keywords).join(" ")
855
+ set_attribute(attribute_name, new_kws)
856
+ self
857
+ end
858
+
859
+ # :call-seq:
860
+ # kwattr_remove(attribute_name, keywords) → self
861
+ #
862
+ # Remove keywords from a keyword attribute. Any matching keywords that exist in the named
863
+ # attribute are removed, including any multiple entries.
864
+ #
865
+ # If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
866
+ # deleted from the node.
867
+ #
868
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
869
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
870
+ # contain CSS classes. But other keyword attributes exist, for instance
871
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
872
+ #
873
+ # See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
874
+ #
875
+ # [Parameters]
876
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
877
+ # - +keywords+ (String, Array<String>)
878
+ # Keywords to be removed from the attribute named +attribute_name+. May be a string
879
+ # containing whitespace-delimited values, or an Array of String values. Any keywords present
880
+ # in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
881
+ # the attribute is deleted.
882
+ #
883
+ # [Returns] +self+ (Node) for ease of chaining method calls.
884
+ #
885
+ # *Example:*
886
+ #
887
+ # Note that the +rel+ attribute is deleted when empty.
888
+ #
889
+ # node # => <a rel="nofollow noreferrer">link</a>
890
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
891
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
892
+ #
893
+ # Since v1.11.0
894
+ def kwattr_remove(attribute_name, keywords)
895
+ if keywords.nil?
896
+ remove_attribute(attribute_name)
897
+ return self
898
+ end
899
+
900
+ keywords = keywordify(keywords)
901
+ current_kws = kwattr_values(attribute_name)
902
+ new_kws = current_kws - keywords
903
+ if new_kws.empty?
904
+ remove_attribute(attribute_name)
905
+ else
906
+ set_attribute(attribute_name, new_kws.join(" "))
907
+ end
908
+ self
909
+ end
910
+
911
+ alias_method :delete, :remove_attribute
912
+ alias_method :get_attribute, :[]
913
+ alias_method :attr, :[]
914
+ alias_method :set_attribute, :[]=
915
+ alias_method :has_attribute?, :key?
916
+
917
+ # :section:
474
918
 
475
919
  ###
476
920
  # Returns true if this Node matches +selector+
477
- def matches? selector
921
+ def matches?(selector)
478
922
  ancestors.last.search(selector).include?(self)
479
923
  end
480
924
 
481
925
  ###
482
926
  # Create a DocumentFragment containing +tags+ that is relative to _this_
483
927
  # context node.
484
- def fragment tags
485
- type = document.html? ? Nokogiri::HTML : Nokogiri::XML
486
- type::DocumentFragment.new(document, tags, self)
928
+ def fragment(tags)
929
+ document.related_class("DocumentFragment").new(document, tags, self)
487
930
  end
488
931
 
489
932
  ###
490
933
  # Parse +string_or_io+ as a document fragment within the context of
491
934
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
492
935
  # +string_or_io+.
493
- def parse string_or_io, options = nil
494
- options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
495
- if Fixnum === options
496
- options = Nokogiri::XML::ParseOptions.new(options)
936
+ def parse(string_or_io, options = nil)
937
+ ##
938
+ # When the current node is unparented and not an element node, use the
939
+ # document as the parsing context instead. Otherwise, the in-context
940
+ # parser cannot find an element or a document node.
941
+ # Document Fragments are also not usable by the in-context parser.
942
+ if !element? && !document? && (!parent || parent.fragment?)
943
+ return document.parse(string_or_io, options)
497
944
  end
498
- # Give the options to the user
945
+
946
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
947
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
499
948
  yield options if block_given?
500
949
 
501
- contents = string_or_io.respond_to?(:read) ?
502
- string_or_io.read :
950
+ contents = if string_or_io.respond_to?(:read)
951
+ string_or_io.read
952
+ else
503
953
  string_or_io
954
+ end
504
955
 
505
956
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
506
957
 
507
- ##
508
- # This is a horrible hack, but I don't care. See #313 for background.
958
+ # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
959
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
960
+ #
961
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
962
+ # would have been inherited from the context node won't be handled correctly. This hack was
963
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
964
+ # that's not easily prevented (or even detected).
965
+ #
966
+ # I think preferable behavior would be to either:
967
+ #
968
+ # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
969
+ # b. don't recover, but raise a sensible exception
970
+ #
971
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
972
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
509
973
  error_count = document.errors.length
510
974
  node_set = in_context(contents, options.to_i)
511
- if node_set.empty? and document.errors.length > error_count and options.recover?
512
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
513
- node_set = fragment.children
975
+ if node_set.empty? && (document.errors.length > error_count)
976
+ if options.recover?
977
+ fragment = document.related_class("DocumentFragment").parse(contents)
978
+ node_set = fragment.children
979
+ else
980
+ raise document.errors[error_count]
981
+ end
514
982
  end
515
983
  node_set
516
984
  end
517
985
 
518
- ####
519
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
520
- def content= string
521
- self.native_content = encode_special_chars(string.to_s)
522
- end
523
-
524
- ###
525
- # Set the parent Node for this Node
526
- def parent= parent_node
527
- parent_node.add_child(self)
528
- parent_node
529
- end
530
-
531
- ###
532
- # Returns a Hash of {prefix => value} for all namespaces on this
533
- # node and its ancestors.
986
+ # :call-seq:
987
+ # namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
534
988
  #
535
- # This method returns the same namespaces as #namespace_scopes.
989
+ # Fetch all the namespaces on this node and its ancestors.
990
+ #
991
+ # Note that the keys in this hash XML attributes that would be used to define this namespace,
992
+ # such as "xmlns:prefix", not just the prefix.
993
+ #
994
+ # The default namespace for this node will be included with key "xmlns".
995
+ #
996
+ # See also #namespace_scopes
997
+ #
998
+ # [Returns]
999
+ # Hash containing all the namespaces on this node and its ancestors. The hash keys are the
1000
+ # namespace prefix, and the hash value for each key is the namespace URI.
1001
+ #
1002
+ # *Example:*
1003
+ #
1004
+ # doc = Nokogiri::XML(<<~EOF)
1005
+ # <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
1006
+ # <first/>
1007
+ # <second xmlns="http://example.com/child"/>
1008
+ # <third xmlns:foo="http://example.com/foo"/>
1009
+ # </root>
1010
+ # EOF
1011
+ # doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
1012
+ # # => {"xmlns"=>"http://example.com/root",
1013
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1014
+ # doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
1015
+ # # => {"xmlns"=>"http://example.com/child",
1016
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1017
+ # doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
1018
+ # # => {"xmlns:foo"=>"http://example.com/foo",
1019
+ # # "xmlns"=>"http://example.com/root",
1020
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
536
1021
  #
537
- # Returns namespaces in scope for self -- those defined on self
538
- # element directly or any ancestor node -- as a Hash of
539
- # attribute-name/value pairs. Note that the keys in this hash
540
- # XML attributes that would be used to define this namespace,
541
- # such as "xmlns:prefix", not just the prefix. Default namespace
542
- # set on self will be included with key "xmlns". However,
543
- # default namespaces set on ancestor will NOT be, even if self
544
- # has no explicit default namespace.
545
1022
  def namespaces
546
- Hash[namespace_scopes.map { |nd|
547
- key = ['xmlns', nd.prefix].compact.join(':')
548
- if RUBY_VERSION >= '1.9' && document.encoding
549
- begin
550
- key.force_encoding document.encoding
551
- rescue ArgumentError
552
- end
553
- end
554
- [key, nd.href]
555
- }]
1023
+ namespace_scopes.each_with_object({}) do |ns, hash|
1024
+ prefix = ns.prefix
1025
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
1026
+ hash[key] = ns.href
1027
+ end
556
1028
  end
557
1029
 
558
1030
  # Returns true if this is a Comment
@@ -570,11 +1042,21 @@ module Nokogiri
570
1042
  type == DOCUMENT_NODE
571
1043
  end
572
1044
 
573
- # Returns true if this is an HTML::Document node
1045
+ # Returns true if this is an HTML4::Document or HTML5::Document node
574
1046
  def html?
575
1047
  type == HTML_DOCUMENT_NODE
576
1048
  end
577
1049
 
1050
+ # Returns true if this is a Document
1051
+ def document?
1052
+ is_a?(XML::Document)
1053
+ end
1054
+
1055
+ # Returns true if this is a ProcessingInstruction node
1056
+ def processing_instruction?
1057
+ type == PI_NODE
1058
+ end
1059
+
578
1060
  # Returns true if this is a Text node
579
1061
  def text?
580
1062
  type == TEXT_NODE
@@ -586,11 +1068,11 @@ module Nokogiri
586
1068
  end
587
1069
 
588
1070
  ###
589
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
1071
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
590
1072
  # nil on XML documents and on unknown tags.
591
1073
  def description
592
1074
  return nil if document.xml?
593
- Nokogiri::HTML::ElementDescription[name]
1075
+ Nokogiri::HTML4::ElementDescription[name]
594
1076
  end
595
1077
 
596
1078
  ###
@@ -604,7 +1086,8 @@ module Nokogiri
604
1086
  def element?
605
1087
  type == ELEMENT_NODE
606
1088
  end
607
- alias :elem? :element?
1089
+
1090
+ alias_method :elem?, :element?
608
1091
 
609
1092
  ###
610
1093
  # Turn this node in to a string. If the document is HTML, this method
@@ -614,91 +1097,73 @@ module Nokogiri
614
1097
  end
615
1098
 
616
1099
  # Get the inner_html for this node's Node#children
617
- def inner_html *args
1100
+ def inner_html(*args)
618
1101
  children.map { |x| x.to_html(*args) }.join
619
1102
  end
620
1103
 
621
1104
  # Get the path to this node as a CSS expression
622
1105
  def css_path
623
- path.split(/\//).map { |part|
624
- part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
625
- }.compact.join(' > ')
1106
+ path.split(%r{/}).map do |part|
1107
+ part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
1108
+ end.compact.join(" > ")
626
1109
  end
627
1110
 
628
1111
  ###
629
1112
  # Get a list of ancestor Node for this Node. If +selector+ is given,
630
1113
  # the ancestors must match +selector+
631
- def ancestors selector = nil
1114
+ def ancestors(selector = nil)
632
1115
  return NodeSet.new(document) unless respond_to?(:parent)
633
1116
  return NodeSet.new(document) unless parent
634
1117
 
635
1118
  parents = [parent]
636
1119
 
637
1120
  while parents.last.respond_to?(:parent)
638
- break unless ctx_parent = parents.last.parent
1121
+ break unless (ctx_parent = parents.last.parent)
639
1122
  parents << ctx_parent
640
1123
  end
641
1124
 
642
1125
  return NodeSet.new(document, parents) unless selector
643
1126
 
644
1127
  root = parents.last
1128
+ search_results = root.search(selector)
645
1129
 
646
- NodeSet.new(document, parents.find_all { |parent|
647
- root.search(selector).include?(parent)
648
- })
649
- end
650
-
651
- ###
652
- # Adds a default namespace supplied as a string +url+ href, to self.
653
- # The consequence is as an xmlns attribute with supplied argument were
654
- # present in parsed XML. A default namespace set with this method will
655
- # now show up in #attributes, but when this node is serialized to XML an
656
- # "xmlns" attribute will appear. See also #namespace and #namespace=
657
- def default_namespace= url
658
- add_namespace_definition(nil, url)
659
- end
660
- alias :add_namespace :add_namespace_definition
661
-
662
- ###
663
- # Set the default namespace on this node (as would be defined with an
664
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
665
- # a Namespace added this way will NOT be serialized as an xmlns attribute
666
- # for this node. You probably want #default_namespace= instead, or perhaps
667
- # #add_namespace_definition with a nil prefix argument.
668
- def namespace= ns
669
- return set_namespace(ns) unless ns
670
-
671
- unless Nokogiri::XML::Namespace === ns
672
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
673
- end
674
- if ns.document != document
675
- raise ArgumentError, 'namespace must be declared on the same document'
676
- end
677
-
678
- set_namespace ns
1130
+ NodeSet.new(document, parents.find_all do |parent|
1131
+ search_results.include?(parent)
1132
+ end)
679
1133
  end
680
1134
 
681
1135
  ####
682
1136
  # Yields self and all children to +block+ recursively.
683
- def traverse &block
684
- children.each{|j| j.traverse(&block) }
685
- block.call(self)
1137
+ def traverse(&block)
1138
+ children.each { |j| j.traverse(&block) }
1139
+ yield(self)
686
1140
  end
687
1141
 
688
1142
  ###
689
1143
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
690
- def accept visitor
1144
+ def accept(visitor)
691
1145
  visitor.visit(self)
692
1146
  end
693
1147
 
694
1148
  ###
695
1149
  # Test to see if this Node is equal to +other+
696
- def == other
1150
+ def ==(other)
697
1151
  return false unless other
698
1152
  return false unless other.respond_to?(:pointer_id)
699
1153
  pointer_id == other.pointer_id
700
1154
  end
701
1155
 
1156
+ ###
1157
+ # Compare two Node objects with respect to their Document. Nodes from
1158
+ # different documents cannot be compared.
1159
+ def <=>(other)
1160
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1161
+ return nil unless document == other.document
1162
+ compare(other)
1163
+ end
1164
+
1165
+ # :section: Serialization and Generating Output
1166
+
702
1167
  ###
703
1168
  # Serialize Node using +options+. Save options can also be set using a
704
1169
  # block. See SaveOptions.
@@ -713,21 +1178,23 @@ module Nokogiri
713
1178
  # config.format.as_xml
714
1179
  # end
715
1180
  #
716
- def serialize *args, &block
717
- options = args.first.is_a?(Hash) ? args.shift : {
718
- :encoding => args[0],
719
- :save_with => args[1]
720
- }
1181
+ def serialize(*args, &block)
1182
+ options = if args.first.is_a?(Hash)
1183
+ args.shift
1184
+ else
1185
+ {
1186
+ encoding: args[0],
1187
+ save_with: args[1],
1188
+ }
1189
+ end
721
1190
 
722
1191
  encoding = options[:encoding] || document.encoding
723
1192
  options[:encoding] = encoding
724
1193
 
725
- outstring = ""
726
- if encoding && outstring.respond_to?(:force_encoding)
727
- outstring.force_encoding(Encoding.find(encoding))
728
- end
1194
+ outstring = +""
1195
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
729
1196
  io = StringIO.new(outstring)
730
- write_to io, options, &block
1197
+ write_to(io, options, &block)
731
1198
  io.string
732
1199
  end
733
1200
 
@@ -738,8 +1205,8 @@ module Nokogiri
738
1205
  #
739
1206
  # See Node#write_to for a list of +options+. For formatted output,
740
1207
  # use Node#to_xhtml instead.
741
- def to_html options = {}
742
- to_format SaveOptions::DEFAULT_HTML, options
1208
+ def to_html(options = {})
1209
+ to_format(SaveOptions::DEFAULT_HTML, options)
743
1210
  end
744
1211
 
745
1212
  ###
@@ -748,7 +1215,7 @@ module Nokogiri
748
1215
  # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
749
1216
  #
750
1217
  # See Node#write_to for a list of +options+
751
- def to_xml options = {}
1218
+ def to_xml(options = {})
752
1219
  options[:save_with] ||= SaveOptions::DEFAULT_XML
753
1220
  serialize(options)
754
1221
  end
@@ -759,8 +1226,8 @@ module Nokogiri
759
1226
  # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
760
1227
  #
761
1228
  # See Node#write_to for a list of +options+
762
- def to_xhtml options = {}
763
- to_format SaveOptions::DEFAULT_XHTML, options
1229
+ def to_xhtml(options = {})
1230
+ to_format(SaveOptions::DEFAULT_XHTML, options)
764
1231
  end
765
1232
 
766
1233
  ###
@@ -778,40 +1245,45 @@ module Nokogiri
778
1245
  #
779
1246
  # To save indented with two dashes:
780
1247
  #
781
- # node.write_to(io, :indent_text => '-', :indent => 2
1248
+ # node.write_to(io, :indent_text => '-', :indent => 2)
782
1249
  #
783
- def write_to io, *options
784
- options = options.first.is_a?(Hash) ? options.shift : {}
785
- encoding = options[:encoding] || options[0]
1250
+ def write_to(io, *options)
1251
+ options = options.first.is_a?(Hash) ? options.shift : {}
1252
+ encoding = options[:encoding] || options[0]
786
1253
  if Nokogiri.jruby?
787
- save_options = options[:save_with] || options[1]
788
- indent_times = options[:indent] || 0
1254
+ save_options = options[:save_with] || options[1]
1255
+ indent_times = options[:indent] || 0
789
1256
  else
790
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
791
- indent_times = options[:indent] || 2
1257
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1258
+ indent_times = options[:indent] || 2
792
1259
  end
793
- indent_text = options[:indent_text] || ' '
1260
+ indent_text = options[:indent_text] || " "
1261
+
1262
+ # Any string times 0 returns an empty string. Therefore, use the same
1263
+ # string instead of generating a new empty string for every node with
1264
+ # zero indentation.
1265
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
794
1266
 
795
1267
  config = SaveOptions.new(save_options.to_i)
796
1268
  yield config if block_given?
797
1269
 
798
- native_write_to(io, encoding, indent_text * indent_times, config.options)
1270
+ native_write_to(io, encoding, indentation, config.options)
799
1271
  end
800
1272
 
801
1273
  ###
802
1274
  # Write Node as HTML to +io+ with +options+
803
1275
  #
804
1276
  # See Node#write_to for a list of +options+
805
- def write_html_to io, options = {}
806
- write_format_to SaveOptions::DEFAULT_HTML, io, options
1277
+ def write_html_to(io, options = {})
1278
+ write_format_to(SaveOptions::DEFAULT_HTML, io, options)
807
1279
  end
808
1280
 
809
1281
  ###
810
1282
  # Write Node as XHTML to +io+ with +options+
811
1283
  #
812
1284
  # See Node#write_to for a list of +options+
813
- def write_xhtml_to io, options = {}
814
- write_format_to SaveOptions::DEFAULT_XHTML, io, options
1285
+ def write_xhtml_to(io, options = {})
1286
+ write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
815
1287
  end
816
1288
 
817
1289
  ###
@@ -820,138 +1292,111 @@ module Nokogiri
820
1292
  # doc.write_xml_to io, :encoding => 'UTF-8'
821
1293
  #
822
1294
  # See Node#write_to for a list of options
823
- def write_xml_to io, options = {}
1295
+ def write_xml_to(io, options = {})
824
1296
  options[:save_with] ||= SaveOptions::DEFAULT_XML
825
- write_to io, options
1297
+ write_to(io, options)
826
1298
  end
827
1299
 
828
- ###
829
- # Compare two Node objects with respect to their Document. Nodes from
830
- # different documents cannot be compared.
831
- def <=> other
832
- return nil unless other.is_a?(Nokogiri::XML::Node)
833
- return nil unless document == other.document
834
- compare other
1300
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1301
+ c14n_root = self
1302
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1303
+ tn = node.is_a?(XML::Node) ? node : parent
1304
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1305
+ end
835
1306
  end
836
1307
 
837
- ###
838
- # Do xinclude substitution on the subtree below node. If given a block, a
839
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
840
- # passed to it, allowing more convenient modification of the parser options.
841
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
842
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
1308
+ # :section:
843
1309
 
844
- # give options to user
845
- yield options if block_given?
1310
+ protected
846
1311
 
847
- # call c extension
848
- process_xincludes(options.to_i)
1312
+ def coerce(data)
1313
+ case data
1314
+ when XML::NodeSet
1315
+ return data
1316
+ when XML::DocumentFragment
1317
+ return data.children
1318
+ when String
1319
+ return fragment(data).children
1320
+ when Document, XML::Attr
1321
+ # unacceptable
1322
+ when XML::Node
1323
+ return data
1324
+ end
1325
+
1326
+ raise ArgumentError, <<~EOERR
1327
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1328
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1329
+ EOERR
849
1330
  end
850
1331
 
851
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
852
- c14n_root = self
853
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
854
- tn = node.is_a?(XML::Node) ? node : parent
855
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1332
+ private
1333
+
1334
+ def keywordify(keywords)
1335
+ case keywords
1336
+ when Enumerable
1337
+ keywords
1338
+ when String
1339
+ keywords.scan(/\S+/)
1340
+ else
1341
+ raise ArgumentError,
1342
+ "Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
856
1343
  end
857
1344
  end
858
1345
 
859
- private
1346
+ def add_sibling(next_or_previous, node_or_tags)
1347
+ raise("Cannot add sibling to a node with no parent") unless parent
860
1348
 
861
- def add_sibling next_or_previous, node_or_tags
862
- impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
863
- iter = (next_or_previous == :next) ? :reverse_each : :each
1349
+ impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
1350
+ iter = next_or_previous == :next ? :reverse_each : :each
864
1351
 
865
- node_or_tags = coerce node_or_tags
1352
+ node_or_tags = parent.coerce(node_or_tags)
866
1353
  if node_or_tags.is_a?(XML::NodeSet)
867
1354
  if text?
868
- pivot = Nokogiri::XML::Node.new 'dummy', document
869
- send impl, pivot
1355
+ pivot = Nokogiri::XML::Node.new("dummy", document)
1356
+ send(impl, pivot)
870
1357
  else
871
1358
  pivot = self
872
1359
  end
873
- node_or_tags.send(iter) { |n| pivot.send impl, n }
1360
+ node_or_tags.send(iter) { |n| pivot.send(impl, n) }
874
1361
  pivot.unlink if text?
875
1362
  else
876
- send impl, node_or_tags
1363
+ send(impl, node_or_tags)
877
1364
  end
878
1365
  node_or_tags
879
1366
  end
880
1367
 
881
- def to_format save_option, options
882
- # FIXME: this is a hack around broken libxml versions
883
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1368
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1369
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1370
+
1371
+ def to_format(save_option, options)
1372
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
884
1373
 
885
- options[:save_with] |= save_option if options[:save_with]
886
1374
  options[:save_with] = save_option unless options[:save_with]
887
1375
  serialize(options)
888
1376
  end
889
1377
 
890
- def write_format_to save_option, io, options
891
- # FIXME: this is a hack around broken libxml versions
892
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1378
+ def write_format_to(save_option, io, options)
1379
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
893
1380
 
894
1381
  options[:save_with] ||= save_option
895
- write_to io, options
896
- end
897
-
898
- def extract_params params # :nodoc:
899
- # Pop off our custom function handler if it exists
900
- handler = params.find { |param|
901
- ![Hash, String, Symbol].include?(param.class)
902
- }
903
-
904
- params -= [handler] if handler
905
-
906
- hashes = []
907
- while Hash === params.last || params.last.nil?
908
- hashes << params.pop
909
- break if params.empty?
910
- end
911
-
912
- ns, binds = hashes.reverse
913
-
914
- ns ||= document.root ? document.root.namespaces : {}
915
-
916
- [params, handler, ns, binds]
917
- end
918
-
919
- def coerce data # :nodoc:
920
- case data
921
- when XML::NodeSet
922
- return data
923
- when XML::DocumentFragment
924
- return data.children
925
- when String
926
- return fragment(data).children
927
- when Document, XML::Attr
928
- # unacceptable
929
- when XML::Node
930
- return data
931
- end
932
-
933
- raise ArgumentError, <<-EOERR
934
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
935
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
936
- EOERR
937
- end
938
-
939
- def implied_xpath_context
940
- "./"
1382
+ write_to(io, options)
941
1383
  end
942
1384
 
943
1385
  def inspect_attributes
944
1386
  [:name, :namespace, :attribute_nodes, :children]
945
1387
  end
946
1388
 
947
- def add_child_node_and_reparent_attrs node
948
- add_child_node node
949
- node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
1389
+ IMPLIED_XPATH_CONTEXTS = [".//"].freeze
1390
+
1391
+ def add_child_node_and_reparent_attrs(node)
1392
+ add_child_node(node)
1393
+ node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
950
1394
  attr_node.remove
951
1395
  node[attr_node.name] = attr_node.value
952
1396
  end
953
1397
  end
954
-
955
1398
  end
956
1399
  end
957
1400
  end
1401
+
1402
+ require_relative "node/save_options"