nokogiri 1.8.5 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (353) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +765 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +99 -54
  33. data/ext/nokogiri/xml_node.c +1107 -658
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +211 -266
  174. data/.autotest +0 -22
  175. data/.cross_rubies +0 -8
  176. data/.editorconfig +0 -17
  177. data/.gemtest +0 -0
  178. data/.travis.yml +0 -63
  179. data/CHANGELOG.md +0 -1368
  180. data/CONTRIBUTING.md +0 -42
  181. data/C_CODING_STYLE.rdoc +0 -33
  182. data/Gemfile-libxml-ruby +0 -3
  183. data/Manifest.txt +0 -370
  184. data/ROADMAP.md +0 -111
  185. data/Rakefile +0 -348
  186. data/SECURITY.md +0 -19
  187. data/STANDARD_RESPONSES.md +0 -47
  188. data/Y_U_NO_GEMSPEC.md +0 -155
  189. data/appveyor.yml +0 -29
  190. data/build_all +0 -44
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -61
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -15
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -12
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document_fragment.rb +0 -49
  232. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  233. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  234. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  235. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  236. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ruby-2.supp +0 -10
  242. data/tasks/test.rb +0 -100
  243. data/test/css/test_nthiness.rb +0 -226
  244. data/test/css/test_parser.rb +0 -386
  245. data/test/css/test_tokenizer.rb +0 -215
  246. data/test/css/test_xpath_visitor.rb +0 -96
  247. data/test/decorators/test_slop.rb +0 -23
  248. data/test/files/2ch.html +0 -108
  249. data/test/files/GH_1042.html +0 -18
  250. data/test/files/address_book.rlx +0 -12
  251. data/test/files/address_book.xml +0 -10
  252. data/test/files/atom.xml +0 -344
  253. data/test/files/bar/bar.xsd +0 -4
  254. data/test/files/bogus.xml +0 -0
  255. data/test/files/dont_hurt_em_why.xml +0 -422
  256. data/test/files/encoding.html +0 -82
  257. data/test/files/encoding.xhtml +0 -84
  258. data/test/files/exslt.xml +0 -8
  259. data/test/files/exslt.xslt +0 -35
  260. data/test/files/foo/foo.xsd +0 -4
  261. data/test/files/metacharset.html +0 -10
  262. data/test/files/namespace_pressure_test.xml +0 -1684
  263. data/test/files/noencoding.html +0 -47
  264. data/test/files/po.xml +0 -32
  265. data/test/files/po.xsd +0 -66
  266. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  267. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  268. data/test/files/saml/xenc_schema.xsd +0 -146
  269. data/test/files/saml/xmldsig_schema.xsd +0 -318
  270. data/test/files/shift_jis.html +0 -10
  271. data/test/files/shift_jis.xml +0 -5
  272. data/test/files/shift_jis_no_charset.html +0 -9
  273. data/test/files/slow-xpath.xml +0 -25509
  274. data/test/files/snuggles.xml +0 -3
  275. data/test/files/staff.dtd +0 -10
  276. data/test/files/staff.xml +0 -59
  277. data/test/files/staff.xslt +0 -32
  278. data/test/files/test_document_url/bar.xml +0 -2
  279. data/test/files/test_document_url/document.dtd +0 -4
  280. data/test/files/test_document_url/document.xml +0 -6
  281. data/test/files/tlm.html +0 -851
  282. data/test/files/to_be_xincluded.xml +0 -2
  283. data/test/files/valid_bar.xml +0 -2
  284. data/test/files/xinclude.xml +0 -4
  285. data/test/helper.rb +0 -271
  286. data/test/html/sax/test_parser.rb +0 -168
  287. data/test/html/sax/test_parser_context.rb +0 -46
  288. data/test/html/sax/test_parser_text.rb +0 -163
  289. data/test/html/sax/test_push_parser.rb +0 -87
  290. data/test/html/test_attributes.rb +0 -85
  291. data/test/html/test_builder.rb +0 -164
  292. data/test/html/test_document.rb +0 -712
  293. data/test/html/test_document_encoding.rb +0 -143
  294. data/test/html/test_document_fragment.rb +0 -310
  295. data/test/html/test_element_description.rb +0 -105
  296. data/test/html/test_named_characters.rb +0 -14
  297. data/test/html/test_node.rb +0 -212
  298. data/test/html/test_node_encoding.rb +0 -91
  299. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  300. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  301. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  302. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  303. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  304. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  305. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  306. data/test/test_convert_xpath.rb +0 -135
  307. data/test/test_css_cache.rb +0 -47
  308. data/test/test_encoding_handler.rb +0 -48
  309. data/test/test_memory_leak.rb +0 -156
  310. data/test/test_nokogiri.rb +0 -138
  311. data/test/test_soap4r_sax.rb +0 -52
  312. data/test/test_xslt_transforms.rb +0 -314
  313. data/test/xml/node/test_save_options.rb +0 -28
  314. data/test/xml/node/test_subclass.rb +0 -44
  315. data/test/xml/sax/test_parser.rb +0 -402
  316. data/test/xml/sax/test_parser_context.rb +0 -115
  317. data/test/xml/sax/test_parser_text.rb +0 -202
  318. data/test/xml/sax/test_push_parser.rb +0 -265
  319. data/test/xml/test_attr.rb +0 -74
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -54
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -298
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -262
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1325
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -75
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -592
  340. data/test/xml/test_node_set.rb +0 -809
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -620
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -36
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -483
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -470
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,105 +1,130 @@
1
- # encoding: UTF-8
2
- require 'stringio'
3
- require 'nokogiri/xml/node/save_options'
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "stringio"
4
5
 
5
6
  module Nokogiri
6
7
  module XML
7
- ####
8
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
9
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
10
- # to a hash with regard to attributes. For example (from irb):
8
+ # Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
9
+ #
10
+ # == Attributes
11
+ #
12
+ # A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
13
+ # example:
14
+ #
15
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
16
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
17
+ # node['href'] # => "#foo"
18
+ # node.keys # => ["href", "id"]
19
+ # node.values # => ["#foo", "link"]
20
+ # node['class'] = 'green' # => "green"
21
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
22
+ #
23
+ # See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
24
+ #
25
+ # == Navigation
11
26
  #
12
- # irb(main):004:0> node
13
- # => <a href="#foo" id="link">link</a>
14
- # irb(main):005:0> node['href']
15
- # => "#foo"
16
- # irb(main):006:0> node.keys
17
- # => ["href", "id"]
18
- # irb(main):007:0> node.values
19
- # => ["#foo", "link"]
20
- # irb(main):008:0> node['class'] = 'green'
21
- # => "green"
22
- # irb(main):009:0> node
23
- # => <a href="#foo" id="link" class="green">link</a>
24
- # irb(main):010:0>
27
+ # Nokogiri::XML::Node also has methods that let you move around your tree:
25
28
  #
26
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
29
+ # [#parent, #children, #next, #previous]
30
+ # Navigate up, down, or through siblings.
27
31
  #
28
- # Nokogiri::XML::Node also has methods that let you move around your
29
- # tree. For navigating your tree, see:
32
+ # See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
30
33
  #
31
- # * Nokogiri::XML::Node#parent
32
- # * Nokogiri::XML::Node#children
33
- # * Nokogiri::XML::Node#next
34
- # * Nokogiri::XML::Node#previous
34
+ # == Serialization
35
35
  #
36
+ # When printing or otherwise emitting a document or a node (and its subtree), there are a few
37
+ # methods you might want to use:
36
38
  #
37
- # When printing or otherwise emitting a document or a node (and
38
- # its subtree), there are a few methods you might want to use:
39
+ # [#content, #text, #inner_text, #to_str]
40
+ # These methods will all **emit plaintext**,
41
+ # meaning that entities will be replaced (e.g., +&lt;+ will be replaced with +<+), meaning
42
+ # that any sanitizing will likely be un-done in the output.
39
43
  #
40
- # * content, text, inner_text, to_str: emit plaintext
44
+ # [#to_s, #to_xml, #to_html, #inner_html]
45
+ # These methods will all **emit properly-escaped markup**, meaning that it's suitable for
46
+ # consumption by browsers, parsers, etc.
41
47
  #
42
- # These methods will all emit the plaintext version of your
43
- # document, meaning that entities will be replaced (e.g., "&lt;"
44
- # will be replaced with "<"), meaning that any sanitizing will
45
- # likely be un-done in the output.
48
+ # See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
46
49
  #
47
- # * to_s, to_xml, to_html, inner_html: emit well-formed markup
50
+ # == Searching
48
51
  #
49
- # These methods will all emit properly-escaped markup, meaning
50
- # that it's suitable for consumption by browsers, parsers, etc.
52
+ # You may search this node's subtree using methods like #xpath and #css.
53
+ #
54
+ # See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
51
55
  #
52
- # You may search this node's subtree using Searchable#xpath and Searchable#css
53
56
  class Node
54
57
  include Nokogiri::XML::PP::Node
55
58
  include Nokogiri::XML::Searchable
59
+ include Nokogiri::ClassResolver
56
60
  include Enumerable
57
61
 
58
62
  # Element node type, see Nokogiri::XML::Node#element?
59
- ELEMENT_NODE = 1
63
+ ELEMENT_NODE = 1
60
64
  # Attribute node type
61
- ATTRIBUTE_NODE = 2
65
+ ATTRIBUTE_NODE = 2
62
66
  # Text node type, see Nokogiri::XML::Node#text?
63
- TEXT_NODE = 3
67
+ TEXT_NODE = 3
64
68
  # CDATA node type, see Nokogiri::XML::Node#cdata?
65
69
  CDATA_SECTION_NODE = 4
66
70
  # Entity reference node type
67
- ENTITY_REF_NODE = 5
71
+ ENTITY_REF_NODE = 5
68
72
  # Entity node type
69
- ENTITY_NODE = 6
73
+ ENTITY_NODE = 6
70
74
  # PI node type
71
- PI_NODE = 7
75
+ PI_NODE = 7
72
76
  # Comment node type, see Nokogiri::XML::Node#comment?
73
- COMMENT_NODE = 8
77
+ COMMENT_NODE = 8
74
78
  # Document node type, see Nokogiri::XML::Node#xml?
75
- DOCUMENT_NODE = 9
79
+ DOCUMENT_NODE = 9
76
80
  # Document type node type
77
81
  DOCUMENT_TYPE_NODE = 10
78
82
  # Document fragment node type
79
83
  DOCUMENT_FRAG_NODE = 11
80
84
  # Notation node type
81
- NOTATION_NODE = 12
85
+ NOTATION_NODE = 12
82
86
  # HTML document node type, see Nokogiri::XML::Node#html?
83
87
  HTML_DOCUMENT_NODE = 13
84
88
  # DTD node type
85
- DTD_NODE = 14
89
+ DTD_NODE = 14
86
90
  # Element declaration type
87
- ELEMENT_DECL = 15
91
+ ELEMENT_DECL = 15
88
92
  # Attribute declaration type
89
- ATTRIBUTE_DECL = 16
93
+ ATTRIBUTE_DECL = 16
90
94
  # Entity declaration type
91
- ENTITY_DECL = 17
95
+ ENTITY_DECL = 17
92
96
  # Namespace declaration type
93
- NAMESPACE_DECL = 18
97
+ NAMESPACE_DECL = 18
94
98
  # XInclude start type
95
- XINCLUDE_START = 19
99
+ XINCLUDE_START = 19
96
100
  # XInclude end type
97
- XINCLUDE_END = 20
101
+ XINCLUDE_END = 20
98
102
  # DOCB document node type
99
103
  DOCB_DOCUMENT_NODE = 21
100
104
 
101
- def initialize name, document # :nodoc:
102
- # ... Ya. This is empty on purpose.
105
+ #
106
+ # :call-seq:
107
+ # new(name, document) -> Nokogiri::XML::Node
108
+ # new(name, document) { |node| ... } -> Nokogiri::XML::Node
109
+ #
110
+ # Create a new node with +name+ that belongs to +document+.
111
+ #
112
+ # If you intend to add a node to a document tree, it's likely that you will prefer one of the
113
+ # Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
114
+ # both create an element (or subtree) and place it in the document tree.
115
+ #
116
+ # Another alternative, if you are concerned about performance, is
117
+ # Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
118
+ # attributes but (like this method) avoids parsing markup.
119
+ #
120
+ # [Parameters]
121
+ # - +name+ (String)
122
+ # - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
123
+ # [Yields] Nokogiri::XML::Node
124
+ # [Returns] Nokogiri::XML::Node
125
+ #
126
+ def initialize(name, document)
127
+ # This is intentionally empty, and sets the method signature for subclasses.
103
128
  end
104
129
 
105
130
  ###
@@ -108,24 +133,7 @@ module Nokogiri
108
133
  document.decorate(self)
109
134
  end
110
135
 
111
- ###
112
- # Search this node's immediate children using CSS selector +selector+
113
- def > selector
114
- ns = document.root.namespaces
115
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
116
- end
117
-
118
- ###
119
- # Get the attribute value for the attribute +name+
120
- def [] name
121
- get(name.to_s)
122
- end
123
-
124
- ###
125
- # Set the attribute value for the attribute +name+ to +value+
126
- def []= name, value
127
- set name.to_s, value.to_s
128
- end
136
+ # :section: Manipulating Document Structure
129
137
 
130
138
  ###
131
139
  # Add +node_or_tags+ as a child of this Node.
@@ -134,12 +142,12 @@ module Nokogiri
134
142
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
135
143
  #
136
144
  # Also see related method +<<+.
137
- def add_child node_or_tags
145
+ def add_child(node_or_tags)
138
146
  node_or_tags = coerce(node_or_tags)
139
147
  if node_or_tags.is_a?(XML::NodeSet)
140
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
148
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
141
149
  else
142
- add_child_node_and_reparent_attrs node_or_tags
150
+ add_child_node_and_reparent_attrs(node_or_tags)
143
151
  end
144
152
  node_or_tags
145
153
  end
@@ -151,16 +159,28 @@ module Nokogiri
151
159
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
152
160
  #
153
161
  # Also see related method +add_child+.
154
- def prepend_child node_or_tags
155
- if first = children.first
162
+ def prepend_child(node_or_tags)
163
+ if (first = children.first)
156
164
  # Mimic the error add_child would raise.
157
- raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
165
+ raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
166
+
158
167
  first.__send__(:add_sibling, :previous, node_or_tags)
159
168
  else
160
169
  add_child(node_or_tags)
161
170
  end
162
171
  end
163
172
 
173
+ ###
174
+ # Add html around this node
175
+ #
176
+ # Returns self
177
+ def wrap(html)
178
+ new_parent = document.parse(html).first
179
+ add_next_sibling(new_parent)
180
+ new_parent.add_child(self)
181
+ self
182
+ end
183
+
164
184
  ###
165
185
  # Add +node_or_tags+ as a child of this Node.
166
186
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
@@ -168,8 +188,8 @@ module Nokogiri
168
188
  # Returns self, to support chaining of calls (e.g., root << child1 << child2)
169
189
  #
170
190
  # Also see related method +add_child+.
171
- def << node_or_tags
172
- add_child node_or_tags
191
+ def <<(node_or_tags)
192
+ add_child(node_or_tags)
173
193
  self
174
194
  end
175
195
 
@@ -180,10 +200,11 @@ module Nokogiri
180
200
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
181
201
  #
182
202
  # Also see related method +before+.
183
- def add_previous_sibling node_or_tags
184
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
203
+ def add_previous_sibling(node_or_tags)
204
+ raise ArgumentError,
205
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
185
206
 
186
- add_sibling :previous, node_or_tags
207
+ add_sibling(:previous, node_or_tags)
187
208
  end
188
209
 
189
210
  ###
@@ -193,10 +214,11 @@ module Nokogiri
193
214
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
194
215
  #
195
216
  # Also see related method +after+.
196
- def add_next_sibling node_or_tags
197
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
217
+ def add_next_sibling(node_or_tags)
218
+ raise ArgumentError,
219
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
198
220
 
199
- add_sibling :next, node_or_tags
221
+ add_sibling(:next, node_or_tags)
200
222
  end
201
223
 
202
224
  ####
@@ -206,8 +228,8 @@ module Nokogiri
206
228
  # Returns self, to support chaining of calls.
207
229
  #
208
230
  # Also see related method +add_previous_sibling+.
209
- def before node_or_tags
210
- add_previous_sibling node_or_tags
231
+ def before(node_or_tags)
232
+ add_previous_sibling(node_or_tags)
211
233
  self
212
234
  end
213
235
 
@@ -218,8 +240,8 @@ module Nokogiri
218
240
  # Returns self, to support chaining of calls.
219
241
  #
220
242
  # Also see related method +add_next_sibling+.
221
- def after node_or_tags
222
- add_next_sibling node_or_tags
243
+ def after(node_or_tags)
244
+ add_next_sibling(node_or_tags)
223
245
  self
224
246
  end
225
247
 
@@ -227,30 +249,24 @@ module Nokogiri
227
249
  # Set the inner html for this Node to +node_or_tags+
228
250
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
229
251
  #
230
- # Returns self.
231
- #
232
252
  # Also see related method +children=+
233
- def inner_html= node_or_tags
253
+ def inner_html=(node_or_tags)
234
254
  self.children = node_or_tags
235
- self
236
255
  end
237
256
 
238
257
  ####
239
258
  # Set the inner html for this Node +node_or_tags+
240
259
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
241
260
  #
242
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
243
- #
244
261
  # Also see related method +inner_html=+
245
- def children= node_or_tags
262
+ def children=(node_or_tags)
246
263
  node_or_tags = coerce(node_or_tags)
247
264
  children.unlink
248
265
  if node_or_tags.is_a?(XML::NodeSet)
249
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
266
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
250
267
  else
251
- add_child_node_and_reparent_attrs node_or_tags
268
+ add_child_node_and_reparent_attrs(node_or_tags)
252
269
  end
253
- node_or_tags
254
270
  end
255
271
 
256
272
  ####
@@ -260,25 +276,27 @@ module Nokogiri
260
276
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
261
277
  #
262
278
  # Also see related method +swap+.
263
- def replace node_or_tags
279
+ def replace(node_or_tags)
280
+ raise("Cannot replace a node with no parent") unless parent
281
+
264
282
  # We cannot replace a text node directly, otherwise libxml will return
265
283
  # an internal error at parser.c:13031, I don't know exactly why
266
284
  # libxml is trying to find a parent node that is an element or document
267
285
  # so I can't tell if this is bug in libxml or not. issue #775.
268
286
  if text?
269
- replacee = Nokogiri::XML::Node.new 'dummy', document
270
- add_previous_sibling_node replacee
287
+ replacee = Nokogiri::XML::Node.new("dummy", document)
288
+ add_previous_sibling_node(replacee)
271
289
  unlink
272
- return replacee.replace node_or_tags
290
+ return replacee.replace(node_or_tags)
273
291
  end
274
292
 
275
- node_or_tags = coerce(node_or_tags)
293
+ node_or_tags = parent.coerce(node_or_tags)
276
294
 
277
295
  if node_or_tags.is_a?(XML::NodeSet)
278
- node_or_tags.each { |n| add_previous_sibling n }
296
+ node_or_tags.each { |n| add_previous_sibling(n) }
279
297
  unlink
280
298
  else
281
- replace_node node_or_tags
299
+ replace_node(node_or_tags)
282
300
  end
283
301
  node_or_tags
284
302
  end
@@ -290,44 +308,215 @@ module Nokogiri
290
308
  # Returns self, to support chaining of calls.
291
309
  #
292
310
  # Also see related method +replace+.
293
- def swap node_or_tags
294
- replace node_or_tags
311
+ def swap(node_or_tags)
312
+ replace(node_or_tags)
295
313
  self
296
314
  end
297
315
 
298
- alias :next :next_sibling
299
- alias :previous :previous_sibling
300
-
301
- # :stopdoc:
302
- # HACK: This is to work around an RDoc bug
303
- alias :next= :add_next_sibling
304
- # :startdoc:
305
-
306
- alias :previous= :add_previous_sibling
307
- alias :remove :unlink
308
- alias :get_attribute :[]
309
- alias :attr :[]
310
- alias :set_attribute :[]=
311
- alias :text :content
312
- alias :inner_text :content
313
- alias :has_attribute? :key?
314
- alias :name :node_name
315
- alias :name= :node_name=
316
- alias :type :node_type
317
- alias :to_str :text
318
- alias :clone :dup
319
- alias :elements :element_children
320
-
321
316
  ####
322
- # Returns a hash containing the node's attributes. The key is
323
- # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
324
- # representing the attribute.
325
- # If you need to distinguish attributes with the same name, with different namespaces
326
- # use #attribute_nodes instead.
317
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
318
+ def content=(string)
319
+ self.native_content = encode_special_chars(string.to_s)
320
+ end
321
+
322
+ ###
323
+ # Set the parent Node for this Node
324
+ def parent=(parent_node)
325
+ parent_node.add_child(self)
326
+ end
327
+
328
+ ###
329
+ # Adds a default namespace supplied as a string +url+ href, to self.
330
+ # The consequence is as an xmlns attribute with supplied argument were
331
+ # present in parsed XML. A default namespace set with this method will
332
+ # now show up in #attributes, but when this node is serialized to XML an
333
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
334
+ def default_namespace=(url)
335
+ add_namespace_definition(nil, url)
336
+ end
337
+
338
+ ###
339
+ # Set the default namespace on this node (as would be defined with an
340
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
341
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
342
+ # for this node. You probably want #default_namespace= instead, or perhaps
343
+ # #add_namespace_definition with a nil prefix argument.
344
+ def namespace=(ns)
345
+ return set_namespace(ns) unless ns
346
+
347
+ unless Nokogiri::XML::Namespace === ns
348
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
349
+ end
350
+ if ns.document != document
351
+ raise ArgumentError, "namespace must be declared on the same document"
352
+ end
353
+
354
+ set_namespace(ns)
355
+ end
356
+
357
+ ###
358
+ # Do xinclude substitution on the subtree below node. If given a block, a
359
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
360
+ # passed to it, allowing more convenient modification of the parser options.
361
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
362
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
363
+ yield options if block_given?
364
+
365
+ # call c extension
366
+ process_xincludes(options.to_i)
367
+ end
368
+
369
+ alias_method :next, :next_sibling
370
+ alias_method :previous, :previous_sibling
371
+ alias_method :next=, :add_next_sibling
372
+ alias_method :previous=, :add_previous_sibling
373
+ alias_method :remove, :unlink
374
+ alias_method :name=, :node_name=
375
+ alias_method :add_namespace, :add_namespace_definition
376
+
377
+ # :section:
378
+
379
+ alias_method :inner_text, :content
380
+ alias_method :text, :content
381
+ alias_method :to_str, :content
382
+ alias_method :name, :node_name
383
+ alias_method :type, :node_type
384
+ alias_method :clone, :dup
385
+ alias_method :elements, :element_children
386
+
387
+ # :section: Working With Node Attributes
388
+
389
+ # :call-seq: [](name) → (String, nil)
390
+ #
391
+ # Fetch an attribute from this node.
392
+ #
393
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
394
+ # namespaced attributes, use #attribute_with_ns.
395
+ #
396
+ # [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
397
+ #
398
+ # *Example*
399
+ #
400
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
401
+ # child = doc.at_css("child")
402
+ # child["size"] # => "large"
403
+ # child["class"] # => "big wide tall"
404
+ #
405
+ # *Example:* Namespaced attributes will not be returned.
406
+ #
407
+ # ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
408
+ #
409
+ # doc = Nokogiri::XML(<<~EOF)
410
+ # <root xmlns:width='http://example.com/widths'>
411
+ # <child width:size='broad'/>
412
+ # </root>
413
+ # EOF
414
+ # doc.at_css("child")["size"] # => nil
415
+ # doc.at_css("child").attribute("size").value # => "broad"
416
+ # doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
417
+ # # => "broad"
418
+ #
419
+ def [](name)
420
+ get(name.to_s)
421
+ end
422
+
423
+ # :call-seq: []=(name, value) → value
424
+ #
425
+ # Update the attribute +name+ to +value+, or create the attribute if it does not exist.
426
+ #
427
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
428
+ # namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
429
+ # see the example below.
430
+ #
431
+ # [Returns] +value+
432
+ #
433
+ # *Example*
434
+ #
435
+ # doc = Nokogiri::XML("<root><child/></root>")
436
+ # child = doc.at_css("child")
437
+ # child["size"] = "broad"
438
+ # child.to_html
439
+ # # => "<child size=\"broad\"></child>"
440
+ #
441
+ # *Example:* Add a namespaced attribute.
442
+ #
443
+ # doc = Nokogiri::XML(<<~EOF)
444
+ # <root xmlns:width='http://example.com/widths'>
445
+ # <child/>
446
+ # </root>
447
+ # EOF
448
+ # child = doc.at_css("child")
449
+ # child["size"] = "broad"
450
+ # ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
451
+ # child.attribute("size").namespace = ns
452
+ # doc.to_html
453
+ # # => "<root xmlns:width=\"http://example.com/widths\">\n" +
454
+ # # " <child width:size=\"broad\"></child>\n" +
455
+ # # "</root>\n"
456
+ #
457
+ def []=(name, value)
458
+ set(name.to_s, value.to_s)
459
+ end
460
+
461
+ #
462
+ # :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
463
+ #
464
+ # Fetch this node's attributes.
465
+ #
466
+ # ⚠ Because the keys do not include any namespace information for the attribute, in case of a
467
+ # simple name collision, not all attributes will be returned. In this case, you will need to
468
+ # use #attribute_nodes.
469
+ #
470
+ # [Returns]
471
+ # Hash containing attributes belonging to +self+. The hash keys are String attribute
472
+ # names (without the namespace), and the hash values are Nokogiri::XML::Attr.
473
+ #
474
+ # *Example* with no namespaces:
475
+ #
476
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
477
+ # doc.at_css("child").attributes
478
+ # # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
479
+ # # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
480
+ #
481
+ # *Example* with a namespace:
482
+ #
483
+ # doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
484
+ # doc.at_css("child").attributes
485
+ # # => {"size"=>
486
+ # # #(Attr:0x550 {
487
+ # # name = "size",
488
+ # # namespace = #(Namespace:0x564 {
489
+ # # prefix = "desc",
490
+ # # href = "http://example.com/sizes"
491
+ # # }),
492
+ # # value = "large"
493
+ # # })}
494
+ #
495
+ # *Example* with an attribute name collision:
496
+ #
497
+ # ⚠ Note that only one of the attributes is returned in the Hash.
498
+ #
499
+ # doc = Nokogiri::XML(<<~EOF)
500
+ # <root xmlns:width='http://example.com/widths'
501
+ # xmlns:height='http://example.com/heights'>
502
+ # <child width:size='broad' height:size='tall'/>
503
+ # </root>
504
+ # EOF
505
+ # doc.at_css("child").attributes
506
+ # # => {"size"=>
507
+ # # #(Attr:0x550 {
508
+ # # name = "size",
509
+ # # namespace = #(Namespace:0x564 {
510
+ # # prefix = "height",
511
+ # # href = "http://example.com/heights"
512
+ # # }),
513
+ # # value = "tall"
514
+ # # })}
515
+ #
327
516
  def attributes
328
- Hash[attribute_nodes.map { |node|
329
- [node.node_name, node]
330
- }]
517
+ attribute_nodes.each_with_object({}) do |node, hash|
518
+ hash[node.node_name] = node
519
+ end
331
520
  end
332
521
 
333
522
  ###
@@ -336,6 +525,12 @@ module Nokogiri
336
525
  attribute_nodes.map(&:value)
337
526
  end
338
527
 
528
+ ###
529
+ # Does this Node's attributes include <value>
530
+ def value?(value)
531
+ values.include?(value)
532
+ end
533
+
339
534
  ###
340
535
  # Get the attribute names for this Node.
341
536
  def keys
@@ -345,97 +540,401 @@ module Nokogiri
345
540
  ###
346
541
  # Iterate over each attribute name and value pair for this Node.
347
542
  def each
348
- attribute_nodes.each { |node|
543
+ attribute_nodes.each do |node|
349
544
  yield [node.node_name, node.value]
350
- }
545
+ end
351
546
  end
352
547
 
353
548
  ###
354
- # Get the list of class names of this Node, without
355
- # deduplication or sorting.
549
+ # Remove the attribute named +name+
550
+ def remove_attribute(name)
551
+ attr = attributes[name].remove if key?(name)
552
+ clear_xpath_context if Nokogiri.jruby?
553
+ attr
554
+ end
555
+
556
+ #
557
+ # :call-seq: classes() → Array<String>
558
+ #
559
+ # Fetch CSS class names of a Node.
560
+ #
561
+ # This is a convenience function and is equivalent to:
562
+ #
563
+ # node.kwattr_values("class")
564
+ #
565
+ # See related: #kwattr_values, #add_class, #append_class, #remove_class
566
+ #
567
+ # [Returns]
568
+ # The CSS classes (Array of String) present in the Node's "class" attribute. If the
569
+ # attribute is empty or non-existent, the return value is an empty array.
570
+ #
571
+ # *Example*
572
+ #
573
+ # node # => <div class="section title header"></div>
574
+ # node.classes # => ["section", "title", "header"]
575
+ #
356
576
  def classes
357
- self['class'].to_s.scan(/\S+/)
577
+ kwattr_values("class")
358
578
  end
359
579
 
360
- ###
361
- # Add +name+ to the "class" attribute value of this Node and
362
- # return self. If the value is already in the current value, it
363
- # is not added. If no "class" attribute exists yet, one is
364
- # created with the given value.
365
580
  #
366
- # More than one class may be added at a time, separated by a
367
- # space.
368
- def add_class name
369
- names = classes
370
- self['class'] = (names + (name.scan(/\S+/) - names)).join(' ')
581
+ # :call-seq: add_class(names) self
582
+ #
583
+ # Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
584
+ # in the "class" attribute are _not_ added. Note that any existing duplicates in the
585
+ # "class" attribute are not removed. Compare with #append_class.
586
+ #
587
+ # This is a convenience function and is equivalent to:
588
+ #
589
+ # node.kwattr_add("class", names)
590
+ #
591
+ # See related: #kwattr_add, #classes, #append_class, #remove_class
592
+ #
593
+ # [Parameters]
594
+ # - +names+ (String, Array<String>)
595
+ #
596
+ # CSS class names to be added to the Node's "class" attribute. May be a string containing
597
+ # whitespace-delimited names, or an Array of String names. Any class names already present
598
+ # will not be added. Any class names not present will be added. If no "class" attribute
599
+ # exists, one is created.
600
+ #
601
+ # [Returns] +self+ (Node) for ease of chaining method calls.
602
+ #
603
+ # *Example:* Ensure that the node has CSS class "section"
604
+ #
605
+ # node # => <div></div>
606
+ # node.add_class("section") # => <div class="section"></div>
607
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
608
+ #
609
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
610
+ #
611
+ # Note that the CSS class "section" is not added because it is already present.
612
+ # Note also that the pre-existing duplicate CSS class "section" is not removed.
613
+ #
614
+ # node # => <div class="section section"></div>
615
+ # node.add_class("section header") # => <div class="section section header"></div>
616
+ #
617
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
618
+ #
619
+ # node # => <div></div>
620
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
621
+ #
622
+ def add_class(names)
623
+ kwattr_add("class", names)
624
+ end
625
+
626
+ #
627
+ # :call-seq: append_class(names) → self
628
+ #
629
+ # Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
630
+ #
631
+ # This is a convenience function and is equivalent to:
632
+ #
633
+ # node.kwattr_append("class", names)
634
+ #
635
+ # See related: #kwattr_append, #classes, #add_class, #remove_class
636
+ #
637
+ # [Parameters]
638
+ # - +names+ (String, Array<String>)
639
+ #
640
+ # CSS class names to be appended to the Node's "class" attribute. May be a string containing
641
+ # whitespace-delimited names, or an Array of String names. All class names passed in will be
642
+ # appended to the "class" attribute even if they are already present in the attribute
643
+ # value. If no "class" attribute exists, one is created.
644
+ #
645
+ # [Returns] +self+ (Node) for ease of chaining method calls.
646
+ #
647
+ # *Example:* Append "section" to the node's CSS "class" attribute
648
+ #
649
+ # node # => <div></div>
650
+ # node.append_class("section") # => <div class="section"></div>
651
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
652
+ #
653
+ # *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
654
+ #
655
+ # Note that the CSS class "section" is appended even though it is already present
656
+ #
657
+ # node # => <div class="section section"></div>
658
+ # node.append_class("section header") # => <div class="section section section header"></div>
659
+ #
660
+ # *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
661
+ #
662
+ # node # => <div></div>
663
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
664
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
665
+ #
666
+ def append_class(names)
667
+ kwattr_append("class", names)
668
+ end
669
+
670
+ # :call-seq:
671
+ # remove_class(css_classes) → self
672
+ #
673
+ # Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
674
+ # this node's "class" attribute are removed, including any multiple entries.
675
+ #
676
+ # If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
677
+ # attribute is deleted from the node.
678
+ #
679
+ # This is a convenience function and is equivalent to:
680
+ #
681
+ # node.kwattr_remove("class", css_classes)
682
+ #
683
+ # Also see #kwattr_remove, #classes, #add_class, #append_class
684
+ #
685
+ # [Parameters]
686
+ # - +css_classes+ (String, Array<String>)
687
+ #
688
+ # CSS class names to be removed from the Node's
689
+ # "class" attribute. May be a string containing whitespace-delimited names, or an Array of
690
+ # String names. Any class names already present will be removed. If no CSS classes remain,
691
+ # the "class" attribute is deleted.
692
+ #
693
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
694
+ #
695
+ # *Example*: Deleting a CSS class
696
+ #
697
+ # Note that all instances of the class "section" are removed from the "class" attribute.
698
+ #
699
+ # node # => <div class="section header section"></div>
700
+ # node.remove_class("section") # => <div class="header"></div>
701
+ #
702
+ # *Example*: Deleting the only remaining CSS class
703
+ #
704
+ # Note that the attribute is removed once there are no remaining classes.
705
+ #
706
+ # node # => <div class="section"></div>
707
+ # node.remove_class("section") # => <div></div>
708
+ #
709
+ # *Example*: Deleting multiple CSS classes
710
+ #
711
+ # Note that the "class" attribute is deleted once it's empty.
712
+ #
713
+ # node # => <div class="section header float"></div>
714
+ # node.remove_class(["section", "float"]) # => <div class="header"></div>
715
+ #
716
+ def remove_class(names = nil)
717
+ kwattr_remove("class", names)
718
+ end
719
+
720
+ # :call-seq:
721
+ # kwattr_values(attribute_name) → Array<String>
722
+ #
723
+ # Fetch values from a keyword attribute of a Node.
724
+ #
725
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
726
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
727
+ # contain CSS classes. But other keyword attributes exist, for instance
728
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
729
+ #
730
+ # See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
731
+ #
732
+ # [Parameters]
733
+ # - +attribute_name+ (String) The name of the keyword attribute to be inspected.
734
+ #
735
+ # [Returns]
736
+ # (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
737
+ # attribute is empty or non-existent, the return value is an empty array.
738
+ #
739
+ # *Example:*
740
+ #
741
+ # node # => <a rel="nofollow noopener external">link</a>
742
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
743
+ #
744
+ # Since v1.11.0
745
+ def kwattr_values(attribute_name)
746
+ keywordify(get_attribute(attribute_name) || [])
747
+ end
748
+
749
+ # :call-seq:
750
+ # kwattr_add(attribute_name, keywords) → self
751
+ #
752
+ # Ensure that values are present in a keyword attribute.
753
+ #
754
+ # Any values in +keywords+ that already exist in the Node's attribute values are _not_
755
+ # added. Note that any existing duplicates in the attribute values are not removed. Compare
756
+ # with #kwattr_append.
757
+ #
758
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
759
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
760
+ # contain CSS classes. But other keyword attributes exist, for instance
761
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
762
+ #
763
+ # See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
764
+ #
765
+ # [Parameters]
766
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
767
+ # - +keywords+ (String, Array<String>)
768
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
769
+ # whitespace-delimited values, or an Array of String values. Any values already present will
770
+ # not be added. Any values not present will be added. If the named attribute does not exist,
771
+ # it is created.
772
+ #
773
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
774
+ #
775
+ # *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
776
+ #
777
+ # Note that duplicates are not added.
778
+ #
779
+ # node # => <a></a>
780
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
781
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
782
+ #
783
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
784
+ # String argument.
785
+ #
786
+ # Note that "nofollow" is not added because it is already present. Note also that the
787
+ # pre-existing duplicate "nofollow" is not removed.
788
+ #
789
+ # node # => <a rel="nofollow nofollow"></a>
790
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
791
+ #
792
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
793
+ # an Array argument.
794
+ #
795
+ # node # => <a></a>
796
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
797
+ #
798
+ # Since v1.11.0
799
+ def kwattr_add(attribute_name, keywords)
800
+ keywords = keywordify(keywords)
801
+ current_kws = kwattr_values(attribute_name)
802
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
803
+ set_attribute(attribute_name, new_kws)
371
804
  self
372
805
  end
373
806
 
374
- ###
375
- # Append +name+ to the "class" attribute value of this Node and
376
- # return self. The value is simply appended without checking if
377
- # it is already in the current value. If no "class" attribute
378
- # exists yet, one is created with the given value.
807
+ # :call-seq:
808
+ # kwattr_append(attribute_name, keywords) self
809
+ #
810
+ # Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
811
+ # #kwattr_add.
812
+ #
813
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
814
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
815
+ # contain CSS classes. But other keyword attributes exist, for instance
816
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
817
+ #
818
+ # See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
819
+ #
820
+ # [Parameters]
821
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
822
+ # - +keywords+ (String, Array<String>)
823
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
824
+ # whitespace-delimited values, or an Array of String values. All values passed in will be
825
+ # appended to the named attribute even if they are already present in the attribute. If the
826
+ # named attribute does not exist, it is created.
827
+ #
828
+ # [Returns] +self+ (Node) for ease of chaining method calls.
379
829
  #
380
- # More than one class may be appended at a time, separated by a
381
- # space.
382
- def append_class name
383
- self['class'] = (classes + name.scan(/\S+/)).join(' ')
830
+ # *Example:* Append "nofollow" to the +rel+ attribute.
831
+ #
832
+ # Note that duplicates are added.
833
+ #
834
+ # node # => <a></a>
835
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
836
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
837
+ #
838
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
839
+ #
840
+ # Note that "nofollow" is appended even though it is already present.
841
+ #
842
+ # node # => <a rel="nofollow"></a>
843
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
844
+ #
845
+ #
846
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
847
+ #
848
+ # node # => <a></a>
849
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
850
+ #
851
+ # Since v1.11.0
852
+ def kwattr_append(attribute_name, keywords)
853
+ keywords = keywordify(keywords)
854
+ current_kws = kwattr_values(attribute_name)
855
+ new_kws = (current_kws + keywords).join(" ")
856
+ set_attribute(attribute_name, new_kws)
384
857
  self
385
858
  end
386
859
 
387
- ###
388
- # Remove +name+ from the "class" attribute value of this Node
389
- # and return self. If there are many occurrences of the name,
390
- # they are all removed.
860
+ # :call-seq:
861
+ # kwattr_remove(attribute_name, keywords) self
391
862
  #
392
- # More than one class may be removed at a time, separated by a
393
- # space.
863
+ # Remove keywords from a keyword attribute. Any matching keywords that exist in the named
864
+ # attribute are removed, including any multiple entries.
394
865
  #
395
- # If no class name is left after removal, or when +name+ is nil,
396
- # the "class" attribute is removed from this Node.
397
- def remove_class name = nil
398
- if name
399
- names = classes - name.scan(/\S+/)
400
- if names.empty?
401
- delete 'class'
402
- else
403
- self['class'] = names.join(' ')
404
- end
866
+ # If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
867
+ # deleted from the node.
868
+ #
869
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
870
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
871
+ # contain CSS classes. But other keyword attributes exist, for instance
872
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
873
+ #
874
+ # See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
875
+ #
876
+ # [Parameters]
877
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
878
+ # - +keywords+ (String, Array<String>)
879
+ # Keywords to be removed from the attribute named +attribute_name+. May be a string
880
+ # containing whitespace-delimited values, or an Array of String values. Any keywords present
881
+ # in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
882
+ # the attribute is deleted.
883
+ #
884
+ # [Returns] +self+ (Node) for ease of chaining method calls.
885
+ #
886
+ # *Example:*
887
+ #
888
+ # Note that the +rel+ attribute is deleted when empty.
889
+ #
890
+ # node # => <a rel="nofollow noreferrer">link</a>
891
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
892
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
893
+ #
894
+ # Since v1.11.0
895
+ def kwattr_remove(attribute_name, keywords)
896
+ if keywords.nil?
897
+ remove_attribute(attribute_name)
898
+ return self
899
+ end
900
+
901
+ keywords = keywordify(keywords)
902
+ current_kws = kwattr_values(attribute_name)
903
+ new_kws = current_kws - keywords
904
+ if new_kws.empty?
905
+ remove_attribute(attribute_name)
405
906
  else
406
- delete "class"
907
+ set_attribute(attribute_name, new_kws.join(" "))
407
908
  end
408
909
  self
409
910
  end
410
911
 
411
- ###
412
- # Remove the attribute named +name+
413
- def remove_attribute name
414
- attr = attributes[name].remove if key? name
415
- clear_xpath_context if Nokogiri.jruby?
416
- attr
417
- end
418
- alias :delete :remove_attribute
912
+ alias_method :delete, :remove_attribute
913
+ alias_method :get_attribute, :[]
914
+ alias_method :attr, :[]
915
+ alias_method :set_attribute, :[]=
916
+ alias_method :has_attribute?, :key?
917
+
918
+ # :section:
419
919
 
420
920
  ###
421
921
  # Returns true if this Node matches +selector+
422
- def matches? selector
922
+ def matches?(selector)
423
923
  ancestors.last.search(selector).include?(self)
424
924
  end
425
925
 
426
926
  ###
427
927
  # Create a DocumentFragment containing +tags+ that is relative to _this_
428
928
  # context node.
429
- def fragment tags
430
- type = document.html? ? Nokogiri::HTML : Nokogiri::XML
431
- type::DocumentFragment.new(document, tags, self)
929
+ def fragment(tags)
930
+ document.related_class("DocumentFragment").new(document, tags, self)
432
931
  end
433
932
 
434
933
  ###
435
934
  # Parse +string_or_io+ as a document fragment within the context of
436
935
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
437
936
  # +string_or_io+.
438
- def parse string_or_io, options = nil
937
+ def parse(string_or_io, options = nil)
439
938
  ##
440
939
  # When the current node is unparented and not an element node, use the
441
940
  # document as the parsing context instead. Otherwise, the in-context
@@ -446,61 +945,87 @@ module Nokogiri
446
945
  end
447
946
 
448
947
  options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
449
- if Integer === options
450
- options = Nokogiri::XML::ParseOptions.new(options)
451
- end
452
- # Give the options to the user
948
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
453
949
  yield options if block_given?
454
950
 
455
- contents = string_or_io.respond_to?(:read) ?
456
- string_or_io.read :
951
+ contents = if string_or_io.respond_to?(:read)
952
+ string_or_io.read
953
+ else
457
954
  string_or_io
955
+ end
458
956
 
459
957
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
460
958
 
461
- ##
462
- # This is a horrible hack, but I don't care. See #313 for background.
959
+ # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
960
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
961
+ #
962
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
963
+ # would have been inherited from the context node won't be handled correctly. This hack was
964
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
965
+ # that's not easily prevented (or even detected).
966
+ #
967
+ # I think preferable behavior would be to either:
968
+ #
969
+ # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
970
+ # b. don't recover, but raise a sensible exception
971
+ #
972
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
973
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
463
974
  error_count = document.errors.length
464
975
  node_set = in_context(contents, options.to_i)
465
- if node_set.empty? and document.errors.length > error_count and options.recover?
466
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
467
- node_set = fragment.children
976
+ if node_set.empty? && (document.errors.length > error_count)
977
+ if options.recover?
978
+ fragment = document.related_class("DocumentFragment").parse(contents)
979
+ node_set = fragment.children
980
+ else
981
+ raise document.errors[error_count]
982
+ end
468
983
  end
469
984
  node_set
470
985
  end
471
986
 
472
- ####
473
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
474
- def content= string
475
- self.native_content = encode_special_chars(string.to_s)
476
- end
477
-
478
- ###
479
- # Set the parent Node for this Node
480
- def parent= parent_node
481
- parent_node.add_child(self)
482
- parent_node
483
- end
484
-
485
- ###
486
- # Returns a Hash of {prefix => value} for all namespaces on this
487
- # node and its ancestors.
987
+ # :call-seq:
988
+ # namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
989
+ #
990
+ # Fetch all the namespaces on this node and its ancestors.
991
+ #
992
+ # Note that the keys in this hash XML attributes that would be used to define this namespace,
993
+ # such as "xmlns:prefix", not just the prefix.
488
994
  #
489
- # This method returns the same namespaces as #namespace_scopes.
995
+ # The default namespace for this node will be included with key "xmlns".
996
+ #
997
+ # See also #namespace_scopes
998
+ #
999
+ # [Returns]
1000
+ # Hash containing all the namespaces on this node and its ancestors. The hash keys are the
1001
+ # namespace prefix, and the hash value for each key is the namespace URI.
1002
+ #
1003
+ # *Example:*
1004
+ #
1005
+ # doc = Nokogiri::XML(<<~EOF)
1006
+ # <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
1007
+ # <first/>
1008
+ # <second xmlns="http://example.com/child"/>
1009
+ # <third xmlns:foo="http://example.com/foo"/>
1010
+ # </root>
1011
+ # EOF
1012
+ # doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
1013
+ # # => {"xmlns"=>"http://example.com/root",
1014
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1015
+ # doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
1016
+ # # => {"xmlns"=>"http://example.com/child",
1017
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1018
+ # doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
1019
+ # # => {"xmlns:foo"=>"http://example.com/foo",
1020
+ # # "xmlns"=>"http://example.com/root",
1021
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
490
1022
  #
491
- # Returns namespaces in scope for self -- those defined on self
492
- # element directly or any ancestor node -- as a Hash of
493
- # attribute-name/value pairs. Note that the keys in this hash
494
- # XML attributes that would be used to define this namespace,
495
- # such as "xmlns:prefix", not just the prefix. Default namespace
496
- # set on self will be included with key "xmlns". However,
497
- # default namespaces set on ancestor will NOT be, even if self
498
- # has no explicit default namespace.
499
1023
  def namespaces
500
- Hash[namespace_scopes.map { |nd|
501
- key = ['xmlns', nd.prefix].compact.join(':')
502
- [key, nd.href]
503
- }]
1024
+ namespace_scopes.each_with_object({}) do |ns, hash|
1025
+ prefix = ns.prefix
1026
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
1027
+ hash[key] = ns.href
1028
+ end
504
1029
  end
505
1030
 
506
1031
  # Returns true if this is a Comment
@@ -518,14 +1043,14 @@ module Nokogiri
518
1043
  type == DOCUMENT_NODE
519
1044
  end
520
1045
 
521
- # Returns true if this is an HTML::Document node
1046
+ # Returns true if this is an HTML4::Document or HTML5::Document node
522
1047
  def html?
523
1048
  type == HTML_DOCUMENT_NODE
524
1049
  end
525
1050
 
526
1051
  # Returns true if this is a Document
527
1052
  def document?
528
- is_a? XML::Document
1053
+ is_a?(XML::Document)
529
1054
  end
530
1055
 
531
1056
  # Returns true if this is a ProcessingInstruction node
@@ -544,11 +1069,12 @@ module Nokogiri
544
1069
  end
545
1070
 
546
1071
  ###
547
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
1072
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
548
1073
  # nil on XML documents and on unknown tags.
549
1074
  def description
550
1075
  return nil if document.xml?
551
- Nokogiri::HTML::ElementDescription[name]
1076
+
1077
+ Nokogiri::HTML4::ElementDescription[name]
552
1078
  end
553
1079
 
554
1080
  ###
@@ -562,7 +1088,8 @@ module Nokogiri
562
1088
  def element?
563
1089
  type == ELEMENT_NODE
564
1090
  end
565
- alias :elem? :element?
1091
+
1092
+ alias_method :elem?, :element?
566
1093
 
567
1094
  ###
568
1095
  # Turn this node in to a string. If the document is HTML, this method
@@ -572,28 +1099,29 @@ module Nokogiri
572
1099
  end
573
1100
 
574
1101
  # Get the inner_html for this node's Node#children
575
- def inner_html *args
1102
+ def inner_html(*args)
576
1103
  children.map { |x| x.to_html(*args) }.join
577
1104
  end
578
1105
 
579
1106
  # Get the path to this node as a CSS expression
580
1107
  def css_path
581
- path.split(/\//).map { |part|
582
- part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
583
- }.compact.join(' > ')
1108
+ path.split(%r{/}).map do |part|
1109
+ part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
1110
+ end.compact.join(" > ")
584
1111
  end
585
1112
 
586
1113
  ###
587
1114
  # Get a list of ancestor Node for this Node. If +selector+ is given,
588
1115
  # the ancestors must match +selector+
589
- def ancestors selector = nil
1116
+ def ancestors(selector = nil)
590
1117
  return NodeSet.new(document) unless respond_to?(:parent)
591
1118
  return NodeSet.new(document) unless parent
592
1119
 
593
1120
  parents = [parent]
594
1121
 
595
1122
  while parents.last.respond_to?(:parent)
596
- break unless ctx_parent = parents.last.parent
1123
+ break unless (ctx_parent = parents.last.parent)
1124
+
597
1125
  parents << ctx_parent
598
1126
  end
599
1127
 
@@ -602,65 +1130,49 @@ module Nokogiri
602
1130
  root = parents.last
603
1131
  search_results = root.search(selector)
604
1132
 
605
- NodeSet.new(document, parents.find_all { |parent|
1133
+ NodeSet.new(document, parents.find_all do |parent|
606
1134
  search_results.include?(parent)
607
- })
608
- end
609
-
610
- ###
611
- # Adds a default namespace supplied as a string +url+ href, to self.
612
- # The consequence is as an xmlns attribute with supplied argument were
613
- # present in parsed XML. A default namespace set with this method will
614
- # now show up in #attributes, but when this node is serialized to XML an
615
- # "xmlns" attribute will appear. See also #namespace and #namespace=
616
- def default_namespace= url
617
- add_namespace_definition(nil, url)
618
- end
619
- alias :add_namespace :add_namespace_definition
620
-
621
- ###
622
- # Set the default namespace on this node (as would be defined with an
623
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
624
- # a Namespace added this way will NOT be serialized as an xmlns attribute
625
- # for this node. You probably want #default_namespace= instead, or perhaps
626
- # #add_namespace_definition with a nil prefix argument.
627
- def namespace= ns
628
- return set_namespace(ns) unless ns
629
-
630
- unless Nokogiri::XML::Namespace === ns
631
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
632
- end
633
- if ns.document != document
634
- raise ArgumentError, 'namespace must be declared on the same document'
635
- end
636
-
637
- set_namespace ns
1135
+ end)
638
1136
  end
639
1137
 
640
1138
  ####
641
1139
  # Yields self and all children to +block+ recursively.
642
- def traverse &block
643
- children.each{|j| j.traverse(&block) }
644
- block.call(self)
1140
+ def traverse(&block)
1141
+ children.each { |j| j.traverse(&block) }
1142
+ yield(self)
645
1143
  end
646
1144
 
647
1145
  ###
648
1146
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
649
- def accept visitor
1147
+ def accept(visitor)
650
1148
  visitor.visit(self)
651
1149
  end
652
1150
 
653
1151
  ###
654
1152
  # Test to see if this Node is equal to +other+
655
- def == other
1153
+ def ==(other)
656
1154
  return false unless other
657
1155
  return false unless other.respond_to?(:pointer_id)
1156
+
658
1157
  pointer_id == other.pointer_id
659
1158
  end
660
1159
 
661
1160
  ###
662
- # Serialize Node using +options+. Save options can also be set using a
663
- # block. See SaveOptions.
1161
+ # Compare two Node objects with respect to their Document. Nodes from
1162
+ # different documents cannot be compared.
1163
+ def <=>(other)
1164
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1165
+ return nil unless document == other.document
1166
+
1167
+ compare(other)
1168
+ end
1169
+
1170
+ # :section: Serialization and Generating Output
1171
+
1172
+ ###
1173
+ # Serialize Node using +options+. Save options can also be set using a block.
1174
+ #
1175
+ # See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
664
1176
  #
665
1177
  # These two statements are equivalent:
666
1178
  #
@@ -672,19 +1184,23 @@ module Nokogiri
672
1184
  # config.format.as_xml
673
1185
  # end
674
1186
  #
675
- def serialize *args, &block
676
- options = args.first.is_a?(Hash) ? args.shift : {
677
- :encoding => args[0],
678
- :save_with => args[1]
679
- }
1187
+ def serialize(*args, &block)
1188
+ options = if args.first.is_a?(Hash)
1189
+ args.shift
1190
+ else
1191
+ {
1192
+ encoding: args[0],
1193
+ save_with: args[1],
1194
+ }
1195
+ end
680
1196
 
681
1197
  encoding = options[:encoding] || document.encoding
682
1198
  options[:encoding] = encoding
683
1199
 
684
- outstring = String.new
685
- outstring.force_encoding(Encoding.find(encoding || 'utf-8'))
1200
+ outstring = +""
1201
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
686
1202
  io = StringIO.new(outstring)
687
- write_to io, options, &block
1203
+ write_to(io, options, &block)
688
1204
  io.string
689
1205
  end
690
1206
 
@@ -695,8 +1211,8 @@ module Nokogiri
695
1211
  #
696
1212
  # See Node#write_to for a list of +options+. For formatted output,
697
1213
  # use Node#to_xhtml instead.
698
- def to_html options = {}
699
- to_format SaveOptions::DEFAULT_HTML, options
1214
+ def to_html(options = {})
1215
+ to_format(SaveOptions::DEFAULT_HTML, options)
700
1216
  end
701
1217
 
702
1218
  ###
@@ -705,7 +1221,7 @@ module Nokogiri
705
1221
  # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
706
1222
  #
707
1223
  # See Node#write_to for a list of +options+
708
- def to_xml options = {}
1224
+ def to_xml(options = {})
709
1225
  options[:save_with] ||= SaveOptions::DEFAULT_XML
710
1226
  serialize(options)
711
1227
  end
@@ -716,8 +1232,8 @@ module Nokogiri
716
1232
  # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
717
1233
  #
718
1234
  # See Node#write_to for a list of +options+
719
- def to_xhtml options = {}
720
- to_format SaveOptions::DEFAULT_XHTML, options
1235
+ def to_xhtml(options = {})
1236
+ to_format(SaveOptions::DEFAULT_XHTML, options)
721
1237
  end
722
1238
 
723
1239
  ###
@@ -737,38 +1253,43 @@ module Nokogiri
737
1253
  #
738
1254
  # node.write_to(io, :indent_text => '-', :indent => 2)
739
1255
  #
740
- def write_to io, *options
741
- options = options.first.is_a?(Hash) ? options.shift : {}
742
- encoding = options[:encoding] || options[0]
1256
+ def write_to(io, *options)
1257
+ options = options.first.is_a?(Hash) ? options.shift : {}
1258
+ encoding = options[:encoding] || options[0]
743
1259
  if Nokogiri.jruby?
744
- save_options = options[:save_with] || options[1]
745
- indent_times = options[:indent] || 0
1260
+ save_options = options[:save_with] || options[1]
1261
+ indent_times = options[:indent] || 0
746
1262
  else
747
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
748
- indent_times = options[:indent] || 2
1263
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1264
+ indent_times = options[:indent] || 2
749
1265
  end
750
- indent_text = options[:indent_text] || ' '
1266
+ indent_text = options[:indent_text] || " "
1267
+
1268
+ # Any string times 0 returns an empty string. Therefore, use the same
1269
+ # string instead of generating a new empty string for every node with
1270
+ # zero indentation.
1271
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
751
1272
 
752
1273
  config = SaveOptions.new(save_options.to_i)
753
1274
  yield config if block_given?
754
1275
 
755
- native_write_to(io, encoding, indent_text * indent_times, config.options)
1276
+ native_write_to(io, encoding, indentation, config.options)
756
1277
  end
757
1278
 
758
1279
  ###
759
1280
  # Write Node as HTML to +io+ with +options+
760
1281
  #
761
1282
  # See Node#write_to for a list of +options+
762
- def write_html_to io, options = {}
763
- write_format_to SaveOptions::DEFAULT_HTML, io, options
1283
+ def write_html_to(io, options = {})
1284
+ write_format_to(SaveOptions::DEFAULT_HTML, io, options)
764
1285
  end
765
1286
 
766
1287
  ###
767
1288
  # Write Node as XHTML to +io+ with +options+
768
1289
  #
769
1290
  # See Node#write_to for a list of +options+
770
- def write_xhtml_to io, options = {}
771
- write_format_to SaveOptions::DEFAULT_XHTML, io, options
1291
+ def write_xhtml_to(io, options = {})
1292
+ write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
772
1293
  end
773
1294
 
774
1295
  ###
@@ -777,110 +1298,105 @@ module Nokogiri
777
1298
  # doc.write_xml_to io, :encoding => 'UTF-8'
778
1299
  #
779
1300
  # See Node#write_to for a list of options
780
- def write_xml_to io, options = {}
1301
+ def write_xml_to(io, options = {})
781
1302
  options[:save_with] ||= SaveOptions::DEFAULT_XML
782
- write_to io, options
1303
+ write_to(io, options)
783
1304
  end
784
1305
 
785
- ###
786
- # Compare two Node objects with respect to their Document. Nodes from
787
- # different documents cannot be compared.
788
- def <=> other
789
- return nil unless other.is_a?(Nokogiri::XML::Node)
790
- return nil unless document == other.document
791
- compare other
1306
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1307
+ c14n_root = self
1308
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1309
+ tn = node.is_a?(XML::Node) ? node : parent
1310
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1311
+ end
792
1312
  end
793
1313
 
794
- ###
795
- # Do xinclude substitution on the subtree below node. If given a block, a
796
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
797
- # passed to it, allowing more convenient modification of the parser options.
798
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
799
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
1314
+ # :section:
800
1315
 
801
- # give options to user
802
- yield options if block_given?
1316
+ protected
803
1317
 
804
- # call c extension
805
- process_xincludes(options.to_i)
1318
+ def coerce(data)
1319
+ case data
1320
+ when XML::NodeSet
1321
+ return data
1322
+ when XML::DocumentFragment
1323
+ return data.children
1324
+ when String
1325
+ return fragment(data).children
1326
+ when Document, XML::Attr
1327
+ # unacceptable
1328
+ when XML::Node
1329
+ return data
1330
+ end
1331
+
1332
+ raise ArgumentError, <<~EOERR
1333
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1334
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1335
+ EOERR
806
1336
  end
807
1337
 
808
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
809
- c14n_root = self
810
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
811
- tn = node.is_a?(XML::Node) ? node : parent
812
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1338
+ private
1339
+
1340
+ def keywordify(keywords)
1341
+ case keywords
1342
+ when Enumerable
1343
+ keywords
1344
+ when String
1345
+ keywords.scan(/\S+/)
1346
+ else
1347
+ raise ArgumentError,
1348
+ "Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
813
1349
  end
814
1350
  end
815
1351
 
816
- private
1352
+ def add_sibling(next_or_previous, node_or_tags)
1353
+ raise("Cannot add sibling to a node with no parent") unless parent
817
1354
 
818
- def add_sibling next_or_previous, node_or_tags
819
- impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
820
- iter = (next_or_previous == :next) ? :reverse_each : :each
1355
+ impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
1356
+ iter = next_or_previous == :next ? :reverse_each : :each
821
1357
 
822
- node_or_tags = coerce node_or_tags
1358
+ node_or_tags = parent.coerce(node_or_tags)
823
1359
  if node_or_tags.is_a?(XML::NodeSet)
824
1360
  if text?
825
- pivot = Nokogiri::XML::Node.new 'dummy', document
826
- send impl, pivot
1361
+ pivot = Nokogiri::XML::Node.new("dummy", document)
1362
+ send(impl, pivot)
827
1363
  else
828
1364
  pivot = self
829
1365
  end
830
- node_or_tags.send(iter) { |n| pivot.send impl, n }
1366
+ node_or_tags.send(iter) { |n| pivot.send(impl, n) }
831
1367
  pivot.unlink if text?
832
1368
  else
833
- send impl, node_or_tags
1369
+ send(impl, node_or_tags)
834
1370
  end
835
1371
  node_or_tags
836
1372
  end
837
1373
 
838
- def to_format save_option, options
839
- # FIXME: this is a hack around broken libxml versions
840
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1374
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1375
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1376
+
1377
+ def to_format(save_option, options)
1378
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
841
1379
 
842
1380
  options[:save_with] = save_option unless options[:save_with]
843
1381
  serialize(options)
844
1382
  end
845
1383
 
846
- def write_format_to save_option, io, options
847
- # FIXME: this is a hack around broken libxml versions
848
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1384
+ def write_format_to(save_option, io, options)
1385
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
849
1386
 
850
1387
  options[:save_with] ||= save_option
851
- write_to io, options
1388
+ write_to(io, options)
852
1389
  end
853
1390
 
854
1391
  def inspect_attributes
855
1392
  [:name, :namespace, :attribute_nodes, :children]
856
1393
  end
857
1394
 
858
- def coerce data # :nodoc:
859
- case data
860
- when XML::NodeSet
861
- return data
862
- when XML::DocumentFragment
863
- return data.children
864
- when String
865
- return fragment(data).children
866
- when Document, XML::Attr
867
- # unacceptable
868
- when XML::Node
869
- return data
870
- end
1395
+ IMPLIED_XPATH_CONTEXTS = [".//"].freeze
871
1396
 
872
- raise ArgumentError, <<-EOERR
873
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
874
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
875
- EOERR
876
- end
877
-
878
- # @private
879
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
880
-
881
- def add_child_node_and_reparent_attrs node # :nodoc:
882
- add_child_node node
883
- node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
1397
+ def add_child_node_and_reparent_attrs(node)
1398
+ add_child_node(node)
1399
+ node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
884
1400
  attr_node.remove
885
1401
  node[attr_node.name] = attr_node.value
886
1402
  end
@@ -888,3 +1404,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
888
1404
  end
889
1405
  end
890
1406
  end
1407
+
1408
+ require_relative "node/save_options"