nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,105 +1,130 @@
1
- # encoding: UTF-8
2
- require 'stringio'
3
- require 'nokogiri/xml/node/save_options'
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "stringio"
4
5
 
5
6
  module Nokogiri
6
7
  module XML
7
- ####
8
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
9
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
10
- # to a hash with regard to attributes. For example (from irb):
8
+ # Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
9
+ #
10
+ # == Attributes
11
+ #
12
+ # A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
13
+ # example:
14
+ #
15
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
16
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
17
+ # node['href'] # => "#foo"
18
+ # node.keys # => ["href", "id"]
19
+ # node.values # => ["#foo", "link"]
20
+ # node['class'] = 'green' # => "green"
21
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
22
+ #
23
+ # See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
24
+ #
25
+ # == Navigation
26
+ #
27
+ # Nokogiri::XML::Node also has methods that let you move around your tree:
11
28
  #
12
- # irb(main):004:0> node
13
- # => <a href="#foo" id="link">link</a>
14
- # irb(main):005:0> node['href']
15
- # => "#foo"
16
- # irb(main):006:0> node.keys
17
- # => ["href", "id"]
18
- # irb(main):007:0> node.values
19
- # => ["#foo", "link"]
20
- # irb(main):008:0> node['class'] = 'green'
21
- # => "green"
22
- # irb(main):009:0> node
23
- # => <a href="#foo" id="link" class="green">link</a>
24
- # irb(main):010:0>
29
+ # [#parent, #children, #next, #previous]
30
+ # Navigate up, down, or through siblings.
25
31
  #
26
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
32
+ # See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
27
33
  #
28
- # Nokogiri::XML::Node also has methods that let you move around your
29
- # tree. For navigating your tree, see:
34
+ # == Serialization
30
35
  #
31
- # * Nokogiri::XML::Node#parent
32
- # * Nokogiri::XML::Node#children
33
- # * Nokogiri::XML::Node#next
34
- # * Nokogiri::XML::Node#previous
36
+ # When printing or otherwise emitting a document or a node (and its subtree), there are a few
37
+ # methods you might want to use:
35
38
  #
39
+ # [#content, #text, #inner_text, #to_str]
40
+ # These methods will all **emit plaintext**,
41
+ # meaning that entities will be replaced (e.g., +&lt;+ will be replaced with +<+), meaning
42
+ # that any sanitizing will likely be un-done in the output.
36
43
  #
37
- # When printing or otherwise emitting a document or a node (and
38
- # its subtree), there are a few methods you might want to use:
44
+ # [#to_s, #to_xml, #to_html, #inner_html]
45
+ # These methods will all **emit properly-escaped markup**, meaning that it's suitable for
46
+ # consumption by browsers, parsers, etc.
39
47
  #
40
- # * content, text, inner_text, to_str: emit plaintext
48
+ # See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
41
49
  #
42
- # These methods will all emit the plaintext version of your
43
- # document, meaning that entities will be replaced (e.g., "&lt;"
44
- # will be replaced with "<"), meaning that any sanitizing will
45
- # likely be un-done in the output.
50
+ # == Searching
46
51
  #
47
- # * to_s, to_xml, to_html, inner_html: emit well-formed markup
52
+ # You may search this node's subtree using methods like #xpath and #css.
48
53
  #
49
- # These methods will all emit properly-escaped markup, meaning
50
- # that it's suitable for consumption by browsers, parsers, etc.
54
+ # See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
51
55
  #
52
- # You may search this node's subtree using Searchable#xpath and Searchable#css
53
56
  class Node
54
57
  include Nokogiri::XML::PP::Node
55
58
  include Nokogiri::XML::Searchable
59
+ include Nokogiri::ClassResolver
56
60
  include Enumerable
57
61
 
58
62
  # Element node type, see Nokogiri::XML::Node#element?
59
- ELEMENT_NODE = 1
63
+ ELEMENT_NODE = 1
60
64
  # Attribute node type
61
- ATTRIBUTE_NODE = 2
65
+ ATTRIBUTE_NODE = 2
62
66
  # Text node type, see Nokogiri::XML::Node#text?
63
- TEXT_NODE = 3
67
+ TEXT_NODE = 3
64
68
  # CDATA node type, see Nokogiri::XML::Node#cdata?
65
69
  CDATA_SECTION_NODE = 4
66
70
  # Entity reference node type
67
- ENTITY_REF_NODE = 5
71
+ ENTITY_REF_NODE = 5
68
72
  # Entity node type
69
- ENTITY_NODE = 6
73
+ ENTITY_NODE = 6
70
74
  # PI node type
71
- PI_NODE = 7
75
+ PI_NODE = 7
72
76
  # Comment node type, see Nokogiri::XML::Node#comment?
73
- COMMENT_NODE = 8
77
+ COMMENT_NODE = 8
74
78
  # Document node type, see Nokogiri::XML::Node#xml?
75
- DOCUMENT_NODE = 9
79
+ DOCUMENT_NODE = 9
76
80
  # Document type node type
77
81
  DOCUMENT_TYPE_NODE = 10
78
82
  # Document fragment node type
79
83
  DOCUMENT_FRAG_NODE = 11
80
84
  # Notation node type
81
- NOTATION_NODE = 12
85
+ NOTATION_NODE = 12
82
86
  # HTML document node type, see Nokogiri::XML::Node#html?
83
87
  HTML_DOCUMENT_NODE = 13
84
88
  # DTD node type
85
- DTD_NODE = 14
89
+ DTD_NODE = 14
86
90
  # Element declaration type
87
- ELEMENT_DECL = 15
91
+ ELEMENT_DECL = 15
88
92
  # Attribute declaration type
89
- ATTRIBUTE_DECL = 16
93
+ ATTRIBUTE_DECL = 16
90
94
  # Entity declaration type
91
- ENTITY_DECL = 17
95
+ ENTITY_DECL = 17
92
96
  # Namespace declaration type
93
- NAMESPACE_DECL = 18
97
+ NAMESPACE_DECL = 18
94
98
  # XInclude start type
95
- XINCLUDE_START = 19
99
+ XINCLUDE_START = 19
96
100
  # XInclude end type
97
- XINCLUDE_END = 20
101
+ XINCLUDE_END = 20
98
102
  # DOCB document node type
99
103
  DOCB_DOCUMENT_NODE = 21
100
104
 
101
- def initialize name, document # :nodoc:
102
- # ... Ya. This is empty on purpose.
105
+ #
106
+ # :call-seq:
107
+ # new(name, document) -> Nokogiri::XML::Node
108
+ # new(name, document) { |node| ... } -> Nokogiri::XML::Node
109
+ #
110
+ # Create a new node with +name+ that belongs to +document+.
111
+ #
112
+ # If you intend to add a node to a document tree, it's likely that you will prefer one of the
113
+ # Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
114
+ # both create an element (or subtree) and place it in the document tree.
115
+ #
116
+ # Another alternative, if you are concerned about performance, is
117
+ # Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
118
+ # attributes but (like this method) avoids parsing markup.
119
+ #
120
+ # [Parameters]
121
+ # - +name+ (String)
122
+ # - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
123
+ # [Yields] Nokogiri::XML::Node
124
+ # [Returns] Nokogiri::XML::Node
125
+ #
126
+ def initialize(name, document)
127
+ # This is intentionally empty, and sets the method signature for subclasses.
103
128
  end
104
129
 
105
130
  ###
@@ -108,226 +133,483 @@ module Nokogiri
108
133
  document.decorate(self)
109
134
  end
110
135
 
111
- ###
112
- # Search this node's immediate children using CSS selector +selector+
113
- def > selector
114
- ns = document.root.namespaces
115
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
116
- end
117
-
118
- ###
119
- # Get the attribute value for the attribute +name+
120
- def [] name
121
- get(name.to_s)
122
- end
123
-
124
- ###
125
- # Set the attribute value for the attribute +name+ to +value+
126
- def []= name, value
127
- set name.to_s, value.to_s
128
- end
136
+ # :section: Manipulating Document Structure
129
137
 
130
138
  ###
131
139
  # Add +node_or_tags+ as a child of this Node.
132
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
133
140
  #
134
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
141
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
142
+ # containing markup.
143
+ #
144
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
145
+ # a DocumentFragment, NodeSet, or String).
135
146
  #
136
147
  # Also see related method +<<+.
137
- def add_child node_or_tags
148
+ def add_child(node_or_tags)
138
149
  node_or_tags = coerce(node_or_tags)
139
150
  if node_or_tags.is_a?(XML::NodeSet)
140
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
151
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
141
152
  else
142
- add_child_node_and_reparent_attrs node_or_tags
153
+ add_child_node_and_reparent_attrs(node_or_tags)
143
154
  end
144
155
  node_or_tags
145
156
  end
146
157
 
147
158
  ###
148
159
  # Add +node_or_tags+ as the first child of this Node.
149
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
150
160
  #
151
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
161
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
162
+ # containing markup.
163
+ #
164
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
165
+ # a DocumentFragment, NodeSet, or String).
152
166
  #
153
167
  # Also see related method +add_child+.
154
- def prepend_child node_or_tags
155
- if first = children.first
168
+ def prepend_child(node_or_tags)
169
+ if (first = children.first)
156
170
  # Mimic the error add_child would raise.
157
- raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
171
+ raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
172
+
158
173
  first.__send__(:add_sibling, :previous, node_or_tags)
159
174
  else
160
175
  add_child(node_or_tags)
161
176
  end
162
177
  end
163
178
 
179
+ # :call-seq:
180
+ # wrap(markup) -> self
181
+ # wrap(node) -> self
182
+ #
183
+ # Wrap this Node with the node parsed from +markup+ or a dup of the +node+.
184
+ #
185
+ # [Parameters]
186
+ # - *markup* (String)
187
+ # Markup that is parsed and used as the wrapper. This node's parent, if it exists, is used
188
+ # as the context node for parsing; otherwise the associated document is used. If the parsed
189
+ # fragment has multiple roots, the first root node is used as the wrapper.
190
+ # - *node* (Nokogiri::XML::Node)
191
+ # An element that is `#dup`ed and used as the wrapper.
192
+ #
193
+ # [Returns] +self+, to support chaining.
194
+ #
195
+ # Also see NodeSet#wrap
196
+ #
197
+ # *Example* with a +String+ argument:
198
+ #
199
+ # doc = Nokogiri::HTML5(<<~HTML)
200
+ # <html><body>
201
+ # <a>asdf</a>
202
+ # </body></html>
203
+ # HTML
204
+ # doc.at_css("a").wrap("<div></div>")
205
+ # doc.to_html
206
+ # # => <html><head></head><body>
207
+ # # <div><a>asdf</a></div>
208
+ # # </body></html>
209
+ #
210
+ # *Example* with a +Node+ argument:
211
+ #
212
+ # doc = Nokogiri::HTML5(<<~HTML)
213
+ # <html><body>
214
+ # <a>asdf</a>
215
+ # </body></html>
216
+ # HTML
217
+ # doc.at_css("a").wrap(doc.create_element("div"))
218
+ # doc.to_html
219
+ # # <html><head></head><body>
220
+ # # <div><a>asdf</a></div>
221
+ # # </body></html>
222
+ #
223
+ def wrap(node_or_tags)
224
+ case node_or_tags
225
+ when String
226
+ context_node = parent || document
227
+ new_parent = context_node.coerce(node_or_tags).first
228
+ if new_parent.nil?
229
+ raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
230
+ end
231
+ when XML::Node
232
+ new_parent = node_or_tags.dup
233
+ else
234
+ raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
235
+ end
236
+
237
+ if parent
238
+ add_next_sibling(new_parent)
239
+ else
240
+ new_parent.unlink
241
+ end
242
+ new_parent.add_child(self)
243
+
244
+ self
245
+ end
246
+
164
247
  ###
165
248
  # Add +node_or_tags+ as a child of this Node.
166
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
167
249
  #
168
- # Returns self, to support chaining of calls (e.g., root << child1 << child2)
250
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
251
+ # containing markup.
252
+ #
253
+ # Returns +self+, to support chaining of calls (e.g., root << child1 << child2)
169
254
  #
170
255
  # Also see related method +add_child+.
171
- def << node_or_tags
172
- add_child node_or_tags
256
+ def <<(node_or_tags)
257
+ add_child(node_or_tags)
173
258
  self
174
259
  end
175
260
 
176
261
  ###
177
262
  # Insert +node_or_tags+ before this Node (as a sibling).
178
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
179
263
  #
180
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
264
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
265
+ # containing markup.
266
+ #
267
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
268
+ # a DocumentFragment, NodeSet, or String).
181
269
  #
182
270
  # Also see related method +before+.
183
- def add_previous_sibling node_or_tags
184
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
271
+ def add_previous_sibling(node_or_tags)
272
+ raise ArgumentError,
273
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
185
274
 
186
- add_sibling :previous, node_or_tags
275
+ add_sibling(:previous, node_or_tags)
187
276
  end
188
277
 
189
278
  ###
190
279
  # Insert +node_or_tags+ after this Node (as a sibling).
191
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
192
280
  #
193
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
281
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
282
+ # containing markup.
283
+ #
284
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
285
+ # a DocumentFragment, NodeSet, or String).
194
286
  #
195
287
  # Also see related method +after+.
196
- def add_next_sibling node_or_tags
197
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
288
+ def add_next_sibling(node_or_tags)
289
+ raise ArgumentError,
290
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
198
291
 
199
- add_sibling :next, node_or_tags
292
+ add_sibling(:next, node_or_tags)
200
293
  end
201
294
 
202
295
  ####
203
296
  # Insert +node_or_tags+ before this node (as a sibling).
204
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
205
297
  #
206
- # Returns self, to support chaining of calls.
298
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
299
+ # containing markup.
300
+ #
301
+ # Returns +self+, to support chaining of calls.
207
302
  #
208
303
  # Also see related method +add_previous_sibling+.
209
- def before node_or_tags
210
- add_previous_sibling node_or_tags
304
+ def before(node_or_tags)
305
+ add_previous_sibling(node_or_tags)
211
306
  self
212
307
  end
213
308
 
214
309
  ####
215
310
  # Insert +node_or_tags+ after this node (as a sibling).
216
- # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
217
311
  #
218
- # Returns self, to support chaining of calls.
312
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
313
+ # containing markup.
314
+ #
315
+ # Returns +self+, to support chaining of calls.
219
316
  #
220
317
  # Also see related method +add_next_sibling+.
221
- def after node_or_tags
222
- add_next_sibling node_or_tags
318
+ def after(node_or_tags)
319
+ add_next_sibling(node_or_tags)
223
320
  self
224
321
  end
225
322
 
226
323
  ####
227
- # Set the inner html for this Node to +node_or_tags+
228
- # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
324
+ # Set the content for this Node to +node_or_tags+.
325
+ #
326
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
327
+ # containing markup.
229
328
  #
230
- # Returns self.
329
+ # Please note that despite the name, this method will *not* always parse a String argument
330
+ # as HTML. A String argument will be parsed with the +DocumentFragment+ parser related to this
331
+ # node's document.
332
+ #
333
+ # For example, if the document is an HTML4::Document then the string will be parsed as HTML4
334
+ # using HTML4::DocumentFragment; but if the document is an XML::Document then it will
335
+ # parse the string as XML using XML::DocumentFragment.
231
336
  #
232
337
  # Also see related method +children=+
233
- def inner_html= node_or_tags
338
+ def inner_html=(node_or_tags)
234
339
  self.children = node_or_tags
235
- self
236
340
  end
237
341
 
238
342
  ####
239
- # Set the inner html for this Node +node_or_tags+
240
- # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
343
+ # Set the content for this Node +node_or_tags+
241
344
  #
242
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
345
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
346
+ # containing markup.
243
347
  #
244
348
  # Also see related method +inner_html=+
245
- def children= node_or_tags
349
+ def children=(node_or_tags)
246
350
  node_or_tags = coerce(node_or_tags)
247
351
  children.unlink
248
352
  if node_or_tags.is_a?(XML::NodeSet)
249
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
353
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
250
354
  else
251
- add_child_node_and_reparent_attrs node_or_tags
355
+ add_child_node_and_reparent_attrs(node_or_tags)
252
356
  end
253
- node_or_tags
254
357
  end
255
358
 
256
359
  ####
257
360
  # Replace this Node with +node_or_tags+.
258
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
259
361
  #
260
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
362
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
363
+ # containing markup.
364
+ #
365
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
366
+ # a DocumentFragment, NodeSet, or String).
261
367
  #
262
368
  # Also see related method +swap+.
263
- def replace node_or_tags
369
+ def replace(node_or_tags)
370
+ raise("Cannot replace a node with no parent") unless parent
371
+
264
372
  # We cannot replace a text node directly, otherwise libxml will return
265
373
  # an internal error at parser.c:13031, I don't know exactly why
266
374
  # libxml is trying to find a parent node that is an element or document
267
375
  # so I can't tell if this is bug in libxml or not. issue #775.
268
376
  if text?
269
- replacee = Nokogiri::XML::Node.new 'dummy', document
270
- add_previous_sibling_node replacee
377
+ replacee = Nokogiri::XML::Node.new("dummy", document)
378
+ add_previous_sibling_node(replacee)
271
379
  unlink
272
- return replacee.replace node_or_tags
380
+ return replacee.replace(node_or_tags)
273
381
  end
274
382
 
275
- node_or_tags = coerce(node_or_tags)
383
+ node_or_tags = parent.coerce(node_or_tags)
276
384
 
277
385
  if node_or_tags.is_a?(XML::NodeSet)
278
- node_or_tags.each { |n| add_previous_sibling n }
386
+ node_or_tags.each { |n| add_previous_sibling(n) }
279
387
  unlink
280
388
  else
281
- replace_node node_or_tags
389
+ replace_node(node_or_tags)
282
390
  end
283
391
  node_or_tags
284
392
  end
285
393
 
286
394
  ####
287
395
  # Swap this Node for +node_or_tags+
288
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
396
+ #
397
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
398
+ # Containing markup.
289
399
  #
290
400
  # Returns self, to support chaining of calls.
291
401
  #
292
402
  # Also see related method +replace+.
293
- def swap node_or_tags
294
- replace node_or_tags
403
+ def swap(node_or_tags)
404
+ replace(node_or_tags)
295
405
  self
296
406
  end
297
407
 
298
- alias :next :next_sibling
299
- alias :previous :previous_sibling
300
-
301
- # :stopdoc:
302
- # HACK: This is to work around an RDoc bug
303
- alias :next= :add_next_sibling
304
- # :startdoc:
305
-
306
- alias :previous= :add_previous_sibling
307
- alias :remove :unlink
308
- alias :get_attribute :[]
309
- alias :attr :[]
310
- alias :set_attribute :[]=
311
- alias :text :content
312
- alias :inner_text :content
313
- alias :has_attribute? :key?
314
- alias :name :node_name
315
- alias :name= :node_name=
316
- alias :type :node_type
317
- alias :to_str :text
318
- alias :clone :dup
319
- alias :elements :element_children
320
-
321
408
  ####
322
- # Returns a hash containing the node's attributes. The key is
323
- # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
324
- # representing the attribute.
325
- # If you need to distinguish attributes with the same name, with different namespaces
326
- # use #attribute_nodes instead.
409
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not
410
+ # interpreted as markup.
411
+ def content=(string)
412
+ self.native_content = encode_special_chars(string.to_s)
413
+ end
414
+
415
+ ###
416
+ # Set the parent Node for this Node
417
+ def parent=(parent_node)
418
+ parent_node.add_child(self)
419
+ end
420
+
421
+ ###
422
+ # Adds a default namespace supplied as a string +url+ href, to self.
423
+ # The consequence is as an xmlns attribute with supplied argument were
424
+ # present in parsed XML. A default namespace set with this method will
425
+ # now show up in #attributes, but when this node is serialized to XML an
426
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
427
+ def default_namespace=(url)
428
+ add_namespace_definition(nil, url)
429
+ end
430
+
431
+ ###
432
+ # Set the default namespace on this node (as would be defined with an
433
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
434
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
435
+ # for this node. You probably want #default_namespace= instead, or perhaps
436
+ # #add_namespace_definition with a nil prefix argument.
437
+ def namespace=(ns)
438
+ return set_namespace(ns) unless ns
439
+
440
+ unless Nokogiri::XML::Namespace === ns
441
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
442
+ end
443
+ if ns.document != document
444
+ raise ArgumentError, "namespace must be declared on the same document"
445
+ end
446
+
447
+ set_namespace(ns)
448
+ end
449
+
450
+ ###
451
+ # Do xinclude substitution on the subtree below node. If given a block, a
452
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
453
+ # passed to it, allowing more convenient modification of the parser options.
454
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
455
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
456
+ yield options if block_given?
457
+
458
+ # call c extension
459
+ process_xincludes(options.to_i)
460
+ end
461
+
462
+ alias_method :next, :next_sibling
463
+ alias_method :previous, :previous_sibling
464
+ alias_method :next=, :add_next_sibling
465
+ alias_method :previous=, :add_previous_sibling
466
+ alias_method :remove, :unlink
467
+ alias_method :name=, :node_name=
468
+ alias_method :add_namespace, :add_namespace_definition
469
+
470
+ # :section:
471
+
472
+ alias_method :inner_text, :content
473
+ alias_method :text, :content
474
+ alias_method :to_str, :content
475
+ alias_method :name, :node_name
476
+ alias_method :type, :node_type
477
+ alias_method :clone, :dup
478
+ alias_method :elements, :element_children
479
+
480
+ # :section: Working With Node Attributes
481
+
482
+ # :call-seq: [](name) → (String, nil)
483
+ #
484
+ # Fetch an attribute from this node.
485
+ #
486
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
487
+ # namespaced attributes, use #attribute_with_ns.
488
+ #
489
+ # [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
490
+ #
491
+ # *Example*
492
+ #
493
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
494
+ # child = doc.at_css("child")
495
+ # child["size"] # => "large"
496
+ # child["class"] # => "big wide tall"
497
+ #
498
+ # *Example:* Namespaced attributes will not be returned.
499
+ #
500
+ # ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
501
+ #
502
+ # doc = Nokogiri::XML(<<~EOF)
503
+ # <root xmlns:width='http://example.com/widths'>
504
+ # <child width:size='broad'/>
505
+ # </root>
506
+ # EOF
507
+ # doc.at_css("child")["size"] # => nil
508
+ # doc.at_css("child").attribute("size").value # => "broad"
509
+ # doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
510
+ # # => "broad"
511
+ #
512
+ def [](name)
513
+ get(name.to_s)
514
+ end
515
+
516
+ # :call-seq: []=(name, value) → value
517
+ #
518
+ # Update the attribute +name+ to +value+, or create the attribute if it does not exist.
519
+ #
520
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
521
+ # namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
522
+ # see the example below.
523
+ #
524
+ # [Returns] +value+
525
+ #
526
+ # *Example*
527
+ #
528
+ # doc = Nokogiri::XML("<root><child/></root>")
529
+ # child = doc.at_css("child")
530
+ # child["size"] = "broad"
531
+ # child.to_html
532
+ # # => "<child size=\"broad\"></child>"
533
+ #
534
+ # *Example:* Add a namespaced attribute.
535
+ #
536
+ # doc = Nokogiri::XML(<<~EOF)
537
+ # <root xmlns:width='http://example.com/widths'>
538
+ # <child/>
539
+ # </root>
540
+ # EOF
541
+ # child = doc.at_css("child")
542
+ # child["size"] = "broad"
543
+ # ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
544
+ # child.attribute("size").namespace = ns
545
+ # doc.to_html
546
+ # # => "<root xmlns:width=\"http://example.com/widths\">\n" +
547
+ # # " <child width:size=\"broad\"></child>\n" +
548
+ # # "</root>\n"
549
+ #
550
+ def []=(name, value)
551
+ set(name.to_s, value.to_s)
552
+ end
553
+
554
+ #
555
+ # :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
556
+ #
557
+ # Fetch this node's attributes.
558
+ #
559
+ # ⚠ Because the keys do not include any namespace information for the attribute, in case of a
560
+ # simple name collision, not all attributes will be returned. In this case, you will need to
561
+ # use #attribute_nodes.
562
+ #
563
+ # [Returns]
564
+ # Hash containing attributes belonging to +self+. The hash keys are String attribute
565
+ # names (without the namespace), and the hash values are Nokogiri::XML::Attr.
566
+ #
567
+ # *Example* with no namespaces:
568
+ #
569
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
570
+ # doc.at_css("child").attributes
571
+ # # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
572
+ # # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
573
+ #
574
+ # *Example* with a namespace:
575
+ #
576
+ # doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
577
+ # doc.at_css("child").attributes
578
+ # # => {"size"=>
579
+ # # #(Attr:0x550 {
580
+ # # name = "size",
581
+ # # namespace = #(Namespace:0x564 {
582
+ # # prefix = "desc",
583
+ # # href = "http://example.com/sizes"
584
+ # # }),
585
+ # # value = "large"
586
+ # # })}
587
+ #
588
+ # *Example* with an attribute name collision:
589
+ #
590
+ # ⚠ Note that only one of the attributes is returned in the Hash.
591
+ #
592
+ # doc = Nokogiri::XML(<<~EOF)
593
+ # <root xmlns:width='http://example.com/widths'
594
+ # xmlns:height='http://example.com/heights'>
595
+ # <child width:size='broad' height:size='tall'/>
596
+ # </root>
597
+ # EOF
598
+ # doc.at_css("child").attributes
599
+ # # => {"size"=>
600
+ # # #(Attr:0x550 {
601
+ # # name = "size",
602
+ # # namespace = #(Namespace:0x564 {
603
+ # # prefix = "height",
604
+ # # href = "http://example.com/heights"
605
+ # # }),
606
+ # # value = "tall"
607
+ # # })}
608
+ #
327
609
  def attributes
328
- Hash[attribute_nodes.map { |node|
329
- [node.node_name, node]
330
- }]
610
+ attribute_nodes.each_with_object({}) do |node, hash|
611
+ hash[node.node_name] = node
612
+ end
331
613
  end
332
614
 
333
615
  ###
@@ -336,6 +618,12 @@ module Nokogiri
336
618
  attribute_nodes.map(&:value)
337
619
  end
338
620
 
621
+ ###
622
+ # Does this Node's attributes include <value>
623
+ def value?(value)
624
+ values.include?(value)
625
+ end
626
+
339
627
  ###
340
628
  # Get the attribute names for this Node.
341
629
  def keys
@@ -345,97 +633,401 @@ module Nokogiri
345
633
  ###
346
634
  # Iterate over each attribute name and value pair for this Node.
347
635
  def each
348
- attribute_nodes.each { |node|
636
+ attribute_nodes.each do |node|
349
637
  yield [node.node_name, node.value]
350
- }
638
+ end
351
639
  end
352
640
 
353
641
  ###
354
- # Get the list of class names of this Node, without
355
- # deduplication or sorting.
642
+ # Remove the attribute named +name+
643
+ def remove_attribute(name)
644
+ attr = attributes[name].remove if key?(name)
645
+ clear_xpath_context if Nokogiri.jruby?
646
+ attr
647
+ end
648
+
649
+ #
650
+ # :call-seq: classes() → Array<String>
651
+ #
652
+ # Fetch CSS class names of a Node.
653
+ #
654
+ # This is a convenience function and is equivalent to:
655
+ #
656
+ # node.kwattr_values("class")
657
+ #
658
+ # See related: #kwattr_values, #add_class, #append_class, #remove_class
659
+ #
660
+ # [Returns]
661
+ # The CSS classes (Array of String) present in the Node's "class" attribute. If the
662
+ # attribute is empty or non-existent, the return value is an empty array.
663
+ #
664
+ # *Example*
665
+ #
666
+ # node # => <div class="section title header"></div>
667
+ # node.classes # => ["section", "title", "header"]
668
+ #
356
669
  def classes
357
- self['class'].to_s.scan(/\S+/)
670
+ kwattr_values("class")
358
671
  end
359
672
 
360
- ###
361
- # Add +name+ to the "class" attribute value of this Node and
362
- # return self. If the value is already in the current value, it
363
- # is not added. If no "class" attribute exists yet, one is
364
- # created with the given value.
365
673
  #
366
- # More than one class may be added at a time, separated by a
367
- # space.
368
- def add_class name
369
- names = classes
370
- self['class'] = (names + (name.scan(/\S+/) - names)).join(' ')
674
+ # :call-seq: add_class(names) self
675
+ #
676
+ # Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
677
+ # in the "class" attribute are _not_ added. Note that any existing duplicates in the
678
+ # "class" attribute are not removed. Compare with #append_class.
679
+ #
680
+ # This is a convenience function and is equivalent to:
681
+ #
682
+ # node.kwattr_add("class", names)
683
+ #
684
+ # See related: #kwattr_add, #classes, #append_class, #remove_class
685
+ #
686
+ # [Parameters]
687
+ # - +names+ (String, Array<String>)
688
+ #
689
+ # CSS class names to be added to the Node's "class" attribute. May be a string containing
690
+ # whitespace-delimited names, or an Array of String names. Any class names already present
691
+ # will not be added. Any class names not present will be added. If no "class" attribute
692
+ # exists, one is created.
693
+ #
694
+ # [Returns] +self+ (Node) for ease of chaining method calls.
695
+ #
696
+ # *Example:* Ensure that the node has CSS class "section"
697
+ #
698
+ # node # => <div></div>
699
+ # node.add_class("section") # => <div class="section"></div>
700
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
701
+ #
702
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
703
+ #
704
+ # Note that the CSS class "section" is not added because it is already present.
705
+ # Note also that the pre-existing duplicate CSS class "section" is not removed.
706
+ #
707
+ # node # => <div class="section section"></div>
708
+ # node.add_class("section header") # => <div class="section section header"></div>
709
+ #
710
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
711
+ #
712
+ # node # => <div></div>
713
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
714
+ #
715
+ def add_class(names)
716
+ kwattr_add("class", names)
717
+ end
718
+
719
+ #
720
+ # :call-seq: append_class(names) → self
721
+ #
722
+ # Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
723
+ #
724
+ # This is a convenience function and is equivalent to:
725
+ #
726
+ # node.kwattr_append("class", names)
727
+ #
728
+ # See related: #kwattr_append, #classes, #add_class, #remove_class
729
+ #
730
+ # [Parameters]
731
+ # - +names+ (String, Array<String>)
732
+ #
733
+ # CSS class names to be appended to the Node's "class" attribute. May be a string containing
734
+ # whitespace-delimited names, or an Array of String names. All class names passed in will be
735
+ # appended to the "class" attribute even if they are already present in the attribute
736
+ # value. If no "class" attribute exists, one is created.
737
+ #
738
+ # [Returns] +self+ (Node) for ease of chaining method calls.
739
+ #
740
+ # *Example:* Append "section" to the node's CSS "class" attribute
741
+ #
742
+ # node # => <div></div>
743
+ # node.append_class("section") # => <div class="section"></div>
744
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
745
+ #
746
+ # *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
747
+ #
748
+ # Note that the CSS class "section" is appended even though it is already present
749
+ #
750
+ # node # => <div class="section section"></div>
751
+ # node.append_class("section header") # => <div class="section section section header"></div>
752
+ #
753
+ # *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
754
+ #
755
+ # node # => <div></div>
756
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
757
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
758
+ #
759
+ def append_class(names)
760
+ kwattr_append("class", names)
761
+ end
762
+
763
+ # :call-seq:
764
+ # remove_class(css_classes) → self
765
+ #
766
+ # Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
767
+ # this node's "class" attribute are removed, including any multiple entries.
768
+ #
769
+ # If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
770
+ # attribute is deleted from the node.
771
+ #
772
+ # This is a convenience function and is equivalent to:
773
+ #
774
+ # node.kwattr_remove("class", css_classes)
775
+ #
776
+ # Also see #kwattr_remove, #classes, #add_class, #append_class
777
+ #
778
+ # [Parameters]
779
+ # - +css_classes+ (String, Array<String>)
780
+ #
781
+ # CSS class names to be removed from the Node's
782
+ # "class" attribute. May be a string containing whitespace-delimited names, or an Array of
783
+ # String names. Any class names already present will be removed. If no CSS classes remain,
784
+ # the "class" attribute is deleted.
785
+ #
786
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
787
+ #
788
+ # *Example*: Deleting a CSS class
789
+ #
790
+ # Note that all instances of the class "section" are removed from the "class" attribute.
791
+ #
792
+ # node # => <div class="section header section"></div>
793
+ # node.remove_class("section") # => <div class="header"></div>
794
+ #
795
+ # *Example*: Deleting the only remaining CSS class
796
+ #
797
+ # Note that the attribute is removed once there are no remaining classes.
798
+ #
799
+ # node # => <div class="section"></div>
800
+ # node.remove_class("section") # => <div></div>
801
+ #
802
+ # *Example*: Deleting multiple CSS classes
803
+ #
804
+ # Note that the "class" attribute is deleted once it's empty.
805
+ #
806
+ # node # => <div class="section header float"></div>
807
+ # node.remove_class(["section", "float"]) # => <div class="header"></div>
808
+ #
809
+ def remove_class(names = nil)
810
+ kwattr_remove("class", names)
811
+ end
812
+
813
+ # :call-seq:
814
+ # kwattr_values(attribute_name) → Array<String>
815
+ #
816
+ # Fetch values from a keyword attribute of a Node.
817
+ #
818
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
819
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
820
+ # contain CSS classes. But other keyword attributes exist, for instance
821
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
822
+ #
823
+ # See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
824
+ #
825
+ # [Parameters]
826
+ # - +attribute_name+ (String) The name of the keyword attribute to be inspected.
827
+ #
828
+ # [Returns]
829
+ # (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
830
+ # attribute is empty or non-existent, the return value is an empty array.
831
+ #
832
+ # *Example:*
833
+ #
834
+ # node # => <a rel="nofollow noopener external">link</a>
835
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
836
+ #
837
+ # Since v1.11.0
838
+ def kwattr_values(attribute_name)
839
+ keywordify(get_attribute(attribute_name) || [])
840
+ end
841
+
842
+ # :call-seq:
843
+ # kwattr_add(attribute_name, keywords) → self
844
+ #
845
+ # Ensure that values are present in a keyword attribute.
846
+ #
847
+ # Any values in +keywords+ that already exist in the Node's attribute values are _not_
848
+ # added. Note that any existing duplicates in the attribute values are not removed. Compare
849
+ # with #kwattr_append.
850
+ #
851
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
852
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
853
+ # contain CSS classes. But other keyword attributes exist, for instance
854
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
855
+ #
856
+ # See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
857
+ #
858
+ # [Parameters]
859
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
860
+ # - +keywords+ (String, Array<String>)
861
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
862
+ # whitespace-delimited values, or an Array of String values. Any values already present will
863
+ # not be added. Any values not present will be added. If the named attribute does not exist,
864
+ # it is created.
865
+ #
866
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
867
+ #
868
+ # *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
869
+ #
870
+ # Note that duplicates are not added.
871
+ #
872
+ # node # => <a></a>
873
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
874
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
875
+ #
876
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
877
+ # String argument.
878
+ #
879
+ # Note that "nofollow" is not added because it is already present. Note also that the
880
+ # pre-existing duplicate "nofollow" is not removed.
881
+ #
882
+ # node # => <a rel="nofollow nofollow"></a>
883
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
884
+ #
885
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
886
+ # an Array argument.
887
+ #
888
+ # node # => <a></a>
889
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
890
+ #
891
+ # Since v1.11.0
892
+ def kwattr_add(attribute_name, keywords)
893
+ keywords = keywordify(keywords)
894
+ current_kws = kwattr_values(attribute_name)
895
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
896
+ set_attribute(attribute_name, new_kws)
371
897
  self
372
898
  end
373
899
 
374
- ###
375
- # Append +name+ to the "class" attribute value of this Node and
376
- # return self. The value is simply appended without checking if
377
- # it is already in the current value. If no "class" attribute
378
- # exists yet, one is created with the given value.
900
+ # :call-seq:
901
+ # kwattr_append(attribute_name, keywords) self
902
+ #
903
+ # Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
904
+ # #kwattr_add.
905
+ #
906
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
907
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
908
+ # contain CSS classes. But other keyword attributes exist, for instance
909
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
910
+ #
911
+ # See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
912
+ #
913
+ # [Parameters]
914
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
915
+ # - +keywords+ (String, Array<String>)
916
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
917
+ # whitespace-delimited values, or an Array of String values. All values passed in will be
918
+ # appended to the named attribute even if they are already present in the attribute. If the
919
+ # named attribute does not exist, it is created.
920
+ #
921
+ # [Returns] +self+ (Node) for ease of chaining method calls.
379
922
  #
380
- # More than one class may be appended at a time, separated by a
381
- # space.
382
- def append_class name
383
- self['class'] = (classes + name.scan(/\S+/)).join(' ')
923
+ # *Example:* Append "nofollow" to the +rel+ attribute.
924
+ #
925
+ # Note that duplicates are added.
926
+ #
927
+ # node # => <a></a>
928
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
929
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
930
+ #
931
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
932
+ #
933
+ # Note that "nofollow" is appended even though it is already present.
934
+ #
935
+ # node # => <a rel="nofollow"></a>
936
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
937
+ #
938
+ #
939
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
940
+ #
941
+ # node # => <a></a>
942
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
943
+ #
944
+ # Since v1.11.0
945
+ def kwattr_append(attribute_name, keywords)
946
+ keywords = keywordify(keywords)
947
+ current_kws = kwattr_values(attribute_name)
948
+ new_kws = (current_kws + keywords).join(" ")
949
+ set_attribute(attribute_name, new_kws)
384
950
  self
385
951
  end
386
952
 
387
- ###
388
- # Remove +name+ from the "class" attribute value of this Node
389
- # and return self. If there are many occurrences of the name,
390
- # they are all removed.
953
+ # :call-seq:
954
+ # kwattr_remove(attribute_name, keywords) self
391
955
  #
392
- # More than one class may be removed at a time, separated by a
393
- # space.
956
+ # Remove keywords from a keyword attribute. Any matching keywords that exist in the named
957
+ # attribute are removed, including any multiple entries.
394
958
  #
395
- # If no class name is left after removal, or when +name+ is nil,
396
- # the "class" attribute is removed from this Node.
397
- def remove_class name = nil
398
- if name
399
- names = classes - name.scan(/\S+/)
400
- if names.empty?
401
- delete 'class'
402
- else
403
- self['class'] = names.join(' ')
404
- end
959
+ # If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
960
+ # deleted from the node.
961
+ #
962
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
963
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
964
+ # contain CSS classes. But other keyword attributes exist, for instance
965
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
966
+ #
967
+ # See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
968
+ #
969
+ # [Parameters]
970
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
971
+ # - +keywords+ (String, Array<String>)
972
+ # Keywords to be removed from the attribute named +attribute_name+. May be a string
973
+ # containing whitespace-delimited values, or an Array of String values. Any keywords present
974
+ # in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
975
+ # the attribute is deleted.
976
+ #
977
+ # [Returns] +self+ (Node) for ease of chaining method calls.
978
+ #
979
+ # *Example:*
980
+ #
981
+ # Note that the +rel+ attribute is deleted when empty.
982
+ #
983
+ # node # => <a rel="nofollow noreferrer">link</a>
984
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
985
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
986
+ #
987
+ # Since v1.11.0
988
+ def kwattr_remove(attribute_name, keywords)
989
+ if keywords.nil?
990
+ remove_attribute(attribute_name)
991
+ return self
992
+ end
993
+
994
+ keywords = keywordify(keywords)
995
+ current_kws = kwattr_values(attribute_name)
996
+ new_kws = current_kws - keywords
997
+ if new_kws.empty?
998
+ remove_attribute(attribute_name)
405
999
  else
406
- delete "class"
1000
+ set_attribute(attribute_name, new_kws.join(" "))
407
1001
  end
408
1002
  self
409
1003
  end
410
1004
 
411
- ###
412
- # Remove the attribute named +name+
413
- def remove_attribute name
414
- attr = attributes[name].remove if key? name
415
- clear_xpath_context if Nokogiri.jruby?
416
- attr
417
- end
418
- alias :delete :remove_attribute
1005
+ alias_method :delete, :remove_attribute
1006
+ alias_method :get_attribute, :[]
1007
+ alias_method :attr, :[]
1008
+ alias_method :set_attribute, :[]=
1009
+ alias_method :has_attribute?, :key?
1010
+
1011
+ # :section:
419
1012
 
420
1013
  ###
421
1014
  # Returns true if this Node matches +selector+
422
- def matches? selector
1015
+ def matches?(selector)
423
1016
  ancestors.last.search(selector).include?(self)
424
1017
  end
425
1018
 
426
1019
  ###
427
1020
  # Create a DocumentFragment containing +tags+ that is relative to _this_
428
1021
  # context node.
429
- def fragment tags
430
- type = document.html? ? Nokogiri::HTML : Nokogiri::XML
431
- type::DocumentFragment.new(document, tags, self)
1022
+ def fragment(tags)
1023
+ document.related_class("DocumentFragment").new(document, tags, self)
432
1024
  end
433
1025
 
434
1026
  ###
435
1027
  # Parse +string_or_io+ as a document fragment within the context of
436
1028
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
437
1029
  # +string_or_io+.
438
- def parse string_or_io, options = nil
1030
+ def parse(string_or_io, options = nil)
439
1031
  ##
440
1032
  # When the current node is unparented and not an element node, use the
441
1033
  # document as the parsing context instead. Otherwise, the in-context
@@ -446,61 +1038,87 @@ module Nokogiri
446
1038
  end
447
1039
 
448
1040
  options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
449
- if Integer === options
450
- options = Nokogiri::XML::ParseOptions.new(options)
451
- end
452
- # Give the options to the user
1041
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
453
1042
  yield options if block_given?
454
1043
 
455
- contents = string_or_io.respond_to?(:read) ?
456
- string_or_io.read :
1044
+ contents = if string_or_io.respond_to?(:read)
1045
+ string_or_io.read
1046
+ else
457
1047
  string_or_io
1048
+ end
458
1049
 
459
1050
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
460
1051
 
461
- ##
462
- # This is a horrible hack, but I don't care. See #313 for background.
1052
+ # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
+ #
1055
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
+ # would have been inherited from the context node won't be handled correctly. This hack was
1057
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
+ # that's not easily prevented (or even detected).
1059
+ #
1060
+ # I think preferable behavior would be to either:
1061
+ #
1062
+ # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
+ # b. don't recover, but raise a sensible exception
1064
+ #
1065
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
463
1067
  error_count = document.errors.length
464
1068
  node_set = in_context(contents, options.to_i)
465
- if node_set.empty? and document.errors.length > error_count and options.recover?
466
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
467
- node_set = fragment.children
1069
+ if node_set.empty? && (document.errors.length > error_count)
1070
+ if options.recover?
1071
+ fragment = document.related_class("DocumentFragment").parse(contents)
1072
+ node_set = fragment.children
1073
+ else
1074
+ raise document.errors[error_count]
1075
+ end
468
1076
  end
469
1077
  node_set
470
1078
  end
471
1079
 
472
- ####
473
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
474
- def content= string
475
- self.native_content = encode_special_chars(string.to_s)
476
- end
477
-
478
- ###
479
- # Set the parent Node for this Node
480
- def parent= parent_node
481
- parent_node.add_child(self)
482
- parent_node
483
- end
484
-
485
- ###
486
- # Returns a Hash of {prefix => value} for all namespaces on this
487
- # node and its ancestors.
1080
+ # :call-seq:
1081
+ # namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
1082
+ #
1083
+ # Fetch all the namespaces on this node and its ancestors.
1084
+ #
1085
+ # Note that the keys in this hash XML attributes that would be used to define this namespace,
1086
+ # such as "xmlns:prefix", not just the prefix.
488
1087
  #
489
- # This method returns the same namespaces as #namespace_scopes.
1088
+ # The default namespace for this node will be included with key "xmlns".
1089
+ #
1090
+ # See also #namespace_scopes
1091
+ #
1092
+ # [Returns]
1093
+ # Hash containing all the namespaces on this node and its ancestors. The hash keys are the
1094
+ # namespace prefix, and the hash value for each key is the namespace URI.
1095
+ #
1096
+ # *Example:*
1097
+ #
1098
+ # doc = Nokogiri::XML(<<~EOF)
1099
+ # <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
1100
+ # <first/>
1101
+ # <second xmlns="http://example.com/child"/>
1102
+ # <third xmlns:foo="http://example.com/foo"/>
1103
+ # </root>
1104
+ # EOF
1105
+ # doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
1106
+ # # => {"xmlns"=>"http://example.com/root",
1107
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1108
+ # doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
1109
+ # # => {"xmlns"=>"http://example.com/child",
1110
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1111
+ # doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
1112
+ # # => {"xmlns:foo"=>"http://example.com/foo",
1113
+ # # "xmlns"=>"http://example.com/root",
1114
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
490
1115
  #
491
- # Returns namespaces in scope for self -- those defined on self
492
- # element directly or any ancestor node -- as a Hash of
493
- # attribute-name/value pairs. Note that the keys in this hash
494
- # XML attributes that would be used to define this namespace,
495
- # such as "xmlns:prefix", not just the prefix. Default namespace
496
- # set on self will be included with key "xmlns". However,
497
- # default namespaces set on ancestor will NOT be, even if self
498
- # has no explicit default namespace.
499
1116
  def namespaces
500
- Hash[namespace_scopes.map { |nd|
501
- key = ['xmlns', nd.prefix].compact.join(':')
502
- [key, nd.href]
503
- }]
1117
+ namespace_scopes.each_with_object({}) do |ns, hash|
1118
+ prefix = ns.prefix
1119
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
1120
+ hash[key] = ns.href
1121
+ end
504
1122
  end
505
1123
 
506
1124
  # Returns true if this is a Comment
@@ -518,14 +1136,14 @@ module Nokogiri
518
1136
  type == DOCUMENT_NODE
519
1137
  end
520
1138
 
521
- # Returns true if this is an HTML::Document node
1139
+ # Returns true if this is an HTML4::Document or HTML5::Document node
522
1140
  def html?
523
1141
  type == HTML_DOCUMENT_NODE
524
1142
  end
525
1143
 
526
1144
  # Returns true if this is a Document
527
1145
  def document?
528
- is_a? XML::Document
1146
+ is_a?(XML::Document)
529
1147
  end
530
1148
 
531
1149
  # Returns true if this is a ProcessingInstruction node
@@ -544,11 +1162,12 @@ module Nokogiri
544
1162
  end
545
1163
 
546
1164
  ###
547
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
1165
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
548
1166
  # nil on XML documents and on unknown tags.
549
1167
  def description
550
1168
  return nil if document.xml?
551
- Nokogiri::HTML::ElementDescription[name]
1169
+
1170
+ Nokogiri::HTML4::ElementDescription[name]
552
1171
  end
553
1172
 
554
1173
  ###
@@ -562,7 +1181,8 @@ module Nokogiri
562
1181
  def element?
563
1182
  type == ELEMENT_NODE
564
1183
  end
565
- alias :elem? :element?
1184
+
1185
+ alias_method :elem?, :element?
566
1186
 
567
1187
  ###
568
1188
  # Turn this node in to a string. If the document is HTML, this method
@@ -572,28 +1192,29 @@ module Nokogiri
572
1192
  end
573
1193
 
574
1194
  # Get the inner_html for this node's Node#children
575
- def inner_html *args
1195
+ def inner_html(*args)
576
1196
  children.map { |x| x.to_html(*args) }.join
577
1197
  end
578
1198
 
579
1199
  # Get the path to this node as a CSS expression
580
1200
  def css_path
581
- path.split(/\//).map { |part|
582
- part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
583
- }.compact.join(' > ')
1201
+ path.split(%r{/}).filter_map do |part|
1202
+ part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
1203
+ end.join(" > ")
584
1204
  end
585
1205
 
586
1206
  ###
587
1207
  # Get a list of ancestor Node for this Node. If +selector+ is given,
588
1208
  # the ancestors must match +selector+
589
- def ancestors selector = nil
1209
+ def ancestors(selector = nil)
590
1210
  return NodeSet.new(document) unless respond_to?(:parent)
591
1211
  return NodeSet.new(document) unless parent
592
1212
 
593
1213
  parents = [parent]
594
1214
 
595
1215
  while parents.last.respond_to?(:parent)
596
- break unless ctx_parent = parents.last.parent
1216
+ break unless (ctx_parent = parents.last.parent)
1217
+
597
1218
  parents << ctx_parent
598
1219
  end
599
1220
 
@@ -602,89 +1223,76 @@ module Nokogiri
602
1223
  root = parents.last
603
1224
  search_results = root.search(selector)
604
1225
 
605
- NodeSet.new(document, parents.find_all { |parent|
1226
+ NodeSet.new(document, parents.find_all do |parent|
606
1227
  search_results.include?(parent)
607
- })
608
- end
609
-
610
- ###
611
- # Adds a default namespace supplied as a string +url+ href, to self.
612
- # The consequence is as an xmlns attribute with supplied argument were
613
- # present in parsed XML. A default namespace set with this method will
614
- # now show up in #attributes, but when this node is serialized to XML an
615
- # "xmlns" attribute will appear. See also #namespace and #namespace=
616
- def default_namespace= url
617
- add_namespace_definition(nil, url)
618
- end
619
- alias :add_namespace :add_namespace_definition
620
-
621
- ###
622
- # Set the default namespace on this node (as would be defined with an
623
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
624
- # a Namespace added this way will NOT be serialized as an xmlns attribute
625
- # for this node. You probably want #default_namespace= instead, or perhaps
626
- # #add_namespace_definition with a nil prefix argument.
627
- def namespace= ns
628
- return set_namespace(ns) unless ns
629
-
630
- unless Nokogiri::XML::Namespace === ns
631
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
632
- end
633
- if ns.document != document
634
- raise ArgumentError, 'namespace must be declared on the same document'
635
- end
636
-
637
- set_namespace ns
1228
+ end)
638
1229
  end
639
1230
 
640
1231
  ####
641
1232
  # Yields self and all children to +block+ recursively.
642
- def traverse &block
643
- children.each{|j| j.traverse(&block) }
644
- block.call(self)
1233
+ def traverse(&block)
1234
+ children.each { |j| j.traverse(&block) }
1235
+ yield(self)
645
1236
  end
646
1237
 
647
1238
  ###
648
1239
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
649
- def accept visitor
1240
+ def accept(visitor)
650
1241
  visitor.visit(self)
651
1242
  end
652
1243
 
653
1244
  ###
654
1245
  # Test to see if this Node is equal to +other+
655
- def == other
1246
+ def ==(other)
656
1247
  return false unless other
657
1248
  return false unless other.respond_to?(:pointer_id)
1249
+
658
1250
  pointer_id == other.pointer_id
659
1251
  end
660
1252
 
661
1253
  ###
662
- # Serialize Node using +options+. Save options can also be set using a
663
- # block. See SaveOptions.
1254
+ # Compare two Node objects with respect to their Document. Nodes from
1255
+ # different documents cannot be compared.
1256
+ def <=>(other)
1257
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1258
+ return nil unless document == other.document
1259
+
1260
+ compare(other)
1261
+ end
1262
+
1263
+ # :section: Serialization and Generating Output
1264
+
1265
+ ###
1266
+ # Serialize Node using +options+. Save options can also be set using a block.
1267
+ #
1268
+ # See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
664
1269
  #
665
1270
  # These two statements are equivalent:
666
1271
  #
667
- # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
1272
+ # node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
668
1273
  #
669
1274
  # or
670
1275
  #
671
- # node.serialize(:encoding => 'UTF-8') do |config|
1276
+ # node.serialize(encoding: 'UTF-8') do |config|
672
1277
  # config.format.as_xml
673
1278
  # end
674
1279
  #
675
- def serialize *args, &block
676
- options = args.first.is_a?(Hash) ? args.shift : {
677
- :encoding => args[0],
678
- :save_with => args[1]
679
- }
1280
+ def serialize(*args, &block)
1281
+ options = if args.first.is_a?(Hash)
1282
+ args.shift
1283
+ else
1284
+ {
1285
+ encoding: args[0],
1286
+ save_with: args[1],
1287
+ }
1288
+ end
680
1289
 
681
- encoding = options[:encoding] || document.encoding
682
- options[:encoding] = encoding
1290
+ options[:encoding] ||= document.encoding
1291
+ encoding = Encoding.find(options[:encoding] || "UTF-8")
683
1292
 
684
- outstring = String.new
685
- outstring.force_encoding(Encoding.find(encoding || 'utf-8'))
686
- io = StringIO.new(outstring)
687
- write_to io, options, &block
1293
+ io = StringIO.new(String.new(encoding: encoding))
1294
+
1295
+ write_to(io, options, &block)
688
1296
  io.string
689
1297
  end
690
1298
 
@@ -695,17 +1303,17 @@ module Nokogiri
695
1303
  #
696
1304
  # See Node#write_to for a list of +options+. For formatted output,
697
1305
  # use Node#to_xhtml instead.
698
- def to_html options = {}
699
- to_format SaveOptions::DEFAULT_HTML, options
1306
+ def to_html(options = {})
1307
+ to_format(SaveOptions::DEFAULT_HTML, options)
700
1308
  end
701
1309
 
702
1310
  ###
703
1311
  # Serialize this Node to XML using +options+
704
1312
  #
705
- # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
1313
+ # doc.to_xml(indent: 5, encoding: 'UTF-8')
706
1314
  #
707
1315
  # See Node#write_to for a list of +options+
708
- def to_xml options = {}
1316
+ def to_xml(options = {})
709
1317
  options[:save_with] ||= SaveOptions::DEFAULT_XML
710
1318
  serialize(options)
711
1319
  end
@@ -713,62 +1321,76 @@ module Nokogiri
713
1321
  ###
714
1322
  # Serialize this Node to XHTML using +options+
715
1323
  #
716
- # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
1324
+ # doc.to_xhtml(indent: 5, encoding: 'UTF-8')
717
1325
  #
718
1326
  # See Node#write_to for a list of +options+
719
- def to_xhtml options = {}
720
- to_format SaveOptions::DEFAULT_XHTML, options
1327
+ def to_xhtml(options = {})
1328
+ to_format(SaveOptions::DEFAULT_XHTML, options)
721
1329
  end
722
1330
 
723
1331
  ###
724
- # Write Node to +io+ with +options+. +options+ modify the output of
725
- # this method. Valid options are:
1332
+ # :call-seq:
1333
+ # write_to(io, *options)
1334
+ #
1335
+ # Serialize this node or document to +io+.
726
1336
  #
727
- # * +:encoding+ for changing the encoding
728
- # * +:indent_text+ the indentation text, defaults to one space
729
- # * +:indent+ the number of +:indent_text+ to use, defaults to 2
730
- # * +:save_with+ a combination of SaveOptions constants.
1337
+ # [Parameters]
1338
+ # - +io+ (IO) An IO-like object to which the serialized content will be written.
1339
+ # - +options+ (Hash) See below
1340
+ #
1341
+ # [Options]
1342
+ # * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
1343
+ # * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
1344
+ # * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
1345
+ # * +:save_with+ (Integer) a combination of SaveOptions constants
731
1346
  #
732
1347
  # To save with UTF-8 indented twice:
733
1348
  #
734
- # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
1349
+ # node.write_to(io, encoding: 'UTF-8', indent: 2)
735
1350
  #
736
1351
  # To save indented with two dashes:
737
1352
  #
738
- # node.write_to(io, :indent_text => '-', :indent => 2)
1353
+ # node.write_to(io, indent_text: '-', indent: 2)
739
1354
  #
740
- def write_to io, *options
741
- options = options.first.is_a?(Hash) ? options.shift : {}
742
- encoding = options[:encoding] || options[0]
1355
+ def write_to(io, *options)
1356
+ options = options.first.is_a?(Hash) ? options.shift : {}
1357
+ encoding = options[:encoding] || options[0] || document.encoding
743
1358
  if Nokogiri.jruby?
744
- save_options = options[:save_with] || options[1]
745
- indent_times = options[:indent] || 0
1359
+ save_options = options[:save_with] || options[1]
1360
+ indent_times = options[:indent] || 0
746
1361
  else
747
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
748
- indent_times = options[:indent] || 2
1362
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1363
+ indent_times = options[:indent] || 2
749
1364
  end
750
- indent_text = options[:indent_text] || ' '
1365
+ indent_text = options[:indent_text] || " "
1366
+
1367
+ # Any string times 0 returns an empty string. Therefore, use the same
1368
+ # string instead of generating a new empty string for every node with
1369
+ # zero indentation.
1370
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
751
1371
 
752
1372
  config = SaveOptions.new(save_options.to_i)
753
1373
  yield config if block_given?
754
1374
 
755
- native_write_to(io, encoding, indent_text * indent_times, config.options)
1375
+ encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
1376
+
1377
+ native_write_to(io, encoding, indentation, config.options)
756
1378
  end
757
1379
 
758
1380
  ###
759
1381
  # Write Node as HTML to +io+ with +options+
760
1382
  #
761
1383
  # See Node#write_to for a list of +options+
762
- def write_html_to io, options = {}
763
- write_format_to SaveOptions::DEFAULT_HTML, io, options
1384
+ def write_html_to(io, options = {})
1385
+ write_format_to(SaveOptions::DEFAULT_HTML, io, options)
764
1386
  end
765
1387
 
766
1388
  ###
767
1389
  # Write Node as XHTML to +io+ with +options+
768
1390
  #
769
1391
  # See Node#write_to for a list of +options+
770
- def write_xhtml_to io, options = {}
771
- write_format_to SaveOptions::DEFAULT_XHTML, io, options
1392
+ def write_xhtml_to(io, options = {})
1393
+ write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
772
1394
  end
773
1395
 
774
1396
  ###
@@ -777,110 +1399,168 @@ module Nokogiri
777
1399
  # doc.write_xml_to io, :encoding => 'UTF-8'
778
1400
  #
779
1401
  # See Node#write_to for a list of options
780
- def write_xml_to io, options = {}
1402
+ def write_xml_to(io, options = {})
781
1403
  options[:save_with] ||= SaveOptions::DEFAULT_XML
782
- write_to io, options
1404
+ write_to(io, options)
783
1405
  end
784
1406
 
785
- ###
786
- # Compare two Node objects with respect to their Document. Nodes from
787
- # different documents cannot be compared.
788
- def <=> other
789
- return nil unless other.is_a?(Nokogiri::XML::Node)
790
- return nil unless document == other.document
791
- compare other
1407
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1408
+ c14n_root = self
1409
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1410
+ tn = node.is_a?(XML::Node) ? node : parent
1411
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1412
+ end
792
1413
  end
793
1414
 
794
- ###
795
- # Do xinclude substitution on the subtree below node. If given a block, a
796
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
797
- # passed to it, allowing more convenient modification of the parser options.
798
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
799
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
1415
+ DECONSTRUCT_KEYS = [:name, :attributes, :children, :namespace, :content, :elements, :inner_html].freeze # :nodoc:
1416
+ DECONSTRUCT_METHODS = { attributes: :attribute_nodes }.freeze # :nodoc:
800
1417
 
801
- # give options to user
802
- yield options if block_given?
803
-
804
- # call c extension
805
- process_xincludes(options.to_i)
1418
+ #
1419
+ # :call-seq: deconstruct_keys(array_of_names) Hash
1420
+ #
1421
+ # Returns a hash describing the Node, to use in pattern matching.
1422
+ #
1423
+ # Valid keys and their values:
1424
+ # - +name+ → (String) The name of this node, or "text" if it is a Text node.
1425
+ # - +namespace+ → (Namespace, nil) The namespace of this node, or nil if there is no namespace.
1426
+ # - +attributes+ → (Array<Attr>) The attributes of this node.
1427
+ # - +children+ → (Array<Node>) The children of this node. 💡 Note this includes text nodes.
1428
+ # - +elements+ → (Array<Node>) The child elements of this node. 💡 Note this does not include text nodes.
1429
+ # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1430
+ # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1431
+ #
1432
+ # ⚡ This is an experimental feature, available since v1.14.0
1433
+ #
1434
+ # *Example*
1435
+ #
1436
+ # doc = Nokogiri::XML.parse(<<~XML)
1437
+ # <?xml version="1.0"?>
1438
+ # <parent xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
1439
+ # <child1 foo="abc" noko:bar="def">First</child1>
1440
+ # <noko:child2 foo="qwe" noko:bar="rty">Second</noko:child2>
1441
+ # </parent>
1442
+ # XML
1443
+ #
1444
+ # doc.root.deconstruct_keys([:name, :namespace])
1445
+ # # => {:name=>"parent",
1446
+ # # :namespace=>
1447
+ # # #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })}
1448
+ #
1449
+ # doc.root.deconstruct_keys([:inner_html, :content])
1450
+ # # => {:content=>"\n" + " First\n" + " Second\n",
1451
+ # # :inner_html=>
1452
+ # # "\n" +
1453
+ # # " <child1 foo=\"abc\" noko:bar=\"def\">First</child1>\n" +
1454
+ # # " <noko:child2 foo=\"qwe\" noko:bar=\"rty\">Second</noko:child2>\n"}
1455
+ #
1456
+ # doc.root.elements.first.deconstruct_keys([:attributes])
1457
+ # # => {:attributes=>
1458
+ # # [#(Attr:0x370 { name = "foo", value = "abc" }),
1459
+ # # #(Attr:0x384 {
1460
+ # # name = "bar",
1461
+ # # namespace = #(Namespace:0x398 {
1462
+ # # prefix = "noko",
1463
+ # # href = "http://nokogiri.org/ns/noko"
1464
+ # # }),
1465
+ # # value = "def"
1466
+ # # })]}
1467
+ #
1468
+ def deconstruct_keys(keys)
1469
+ requested_keys = DECONSTRUCT_KEYS & keys
1470
+ {}.tap do |values|
1471
+ requested_keys.each do |key|
1472
+ method = DECONSTRUCT_METHODS[key] || key
1473
+ values[key] = send(method)
1474
+ end
1475
+ end
806
1476
  end
807
1477
 
808
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
809
- c14n_root = self
810
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
811
- tn = node.is_a?(XML::Node) ? node : parent
812
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1478
+ # :section:
1479
+
1480
+ protected
1481
+
1482
+ def coerce(data)
1483
+ case data
1484
+ when XML::NodeSet
1485
+ return data
1486
+ when XML::DocumentFragment
1487
+ return data.children
1488
+ when String
1489
+ return fragment(data).children
1490
+ when Document, XML::Attr
1491
+ # unacceptable
1492
+ when XML::Node
1493
+ return data
813
1494
  end
1495
+
1496
+ raise ArgumentError, <<~EOERR
1497
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1498
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1499
+ EOERR
814
1500
  end
815
1501
 
816
1502
  private
817
1503
 
818
- def add_sibling next_or_previous, node_or_tags
819
- impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
820
- iter = (next_or_previous == :next) ? :reverse_each : :each
1504
+ def keywordify(keywords)
1505
+ case keywords
1506
+ when Enumerable
1507
+ keywords
1508
+ when String
1509
+ keywords.scan(/\S+/)
1510
+ else
1511
+ raise ArgumentError,
1512
+ "Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
1513
+ end
1514
+ end
1515
+
1516
+ def add_sibling(next_or_previous, node_or_tags)
1517
+ raise("Cannot add sibling to a node with no parent") unless parent
821
1518
 
822
- node_or_tags = coerce node_or_tags
1519
+ impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
1520
+ iter = next_or_previous == :next ? :reverse_each : :each
1521
+
1522
+ node_or_tags = parent.coerce(node_or_tags)
823
1523
  if node_or_tags.is_a?(XML::NodeSet)
824
1524
  if text?
825
- pivot = Nokogiri::XML::Node.new 'dummy', document
826
- send impl, pivot
1525
+ pivot = Nokogiri::XML::Node.new("dummy", document)
1526
+ send(impl, pivot)
827
1527
  else
828
1528
  pivot = self
829
1529
  end
830
- node_or_tags.send(iter) { |n| pivot.send impl, n }
1530
+ node_or_tags.send(iter) { |n| pivot.send(impl, n) }
831
1531
  pivot.unlink if text?
832
1532
  else
833
- send impl, node_or_tags
1533
+ send(impl, node_or_tags)
834
1534
  end
835
1535
  node_or_tags
836
1536
  end
837
1537
 
838
- def to_format save_option, options
839
- # FIXME: this is a hack around broken libxml versions
840
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1538
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1539
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1540
+
1541
+ def to_format(save_option, options)
1542
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
841
1543
 
842
1544
  options[:save_with] = save_option unless options[:save_with]
843
1545
  serialize(options)
844
1546
  end
845
1547
 
846
- def write_format_to save_option, io, options
847
- # FIXME: this is a hack around broken libxml versions
848
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1548
+ def write_format_to(save_option, io, options)
1549
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
849
1550
 
850
1551
  options[:save_with] ||= save_option
851
- write_to io, options
1552
+ write_to(io, options)
852
1553
  end
853
1554
 
854
1555
  def inspect_attributes
855
1556
  [:name, :namespace, :attribute_nodes, :children]
856
1557
  end
857
1558
 
858
- def coerce data # :nodoc:
859
- case data
860
- when XML::NodeSet
861
- return data
862
- when XML::DocumentFragment
863
- return data.children
864
- when String
865
- return fragment(data).children
866
- when Document, XML::Attr
867
- # unacceptable
868
- when XML::Node
869
- return data
870
- end
871
-
872
- raise ArgumentError, <<-EOERR
873
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
874
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
875
- EOERR
876
- end
877
-
878
- # @private
879
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
1559
+ IMPLIED_XPATH_CONTEXTS = [".//"].freeze
880
1560
 
881
- def add_child_node_and_reparent_attrs node # :nodoc:
882
- add_child_node node
883
- node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
1561
+ def add_child_node_and_reparent_attrs(node)
1562
+ add_child_node(node)
1563
+ node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
884
1564
  attr_node.remove
885
1565
  node[attr_node.name] = attr_node.value
886
1566
  end
@@ -888,3 +1568,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
888
1568
  end
889
1569
  end
890
1570
  end
1571
+
1572
+ require_relative "node/save_options"