nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -1,87 +1,102 @@
1
- require 'stringio'
2
- require 'nokogiri/xml/node/save_options'
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+ require "stringio"
3
4
 
4
5
  module Nokogiri
5
6
  module XML
6
- ####
7
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
8
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
9
- # to a hash with regard to attributes. For example (from irb):
7
+ ##
8
+ # {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
9
+ # tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
10
+ # example:
10
11
  #
11
- # irb(main):004:0> node
12
- # => <a href="#foo" id="link">link</a>
13
- # irb(main):005:0> node['href']
14
- # => "#foo"
15
- # irb(main):006:0> node.keys
16
- # => ["href", "id"]
17
- # irb(main):007:0> node.values
18
- # => ["#foo", "link"]
19
- # irb(main):008:0> node['class'] = 'green'
20
- # => "green"
21
- # irb(main):009:0> node
22
- # => <a href="#foo" id="link" class="green">link</a>
23
- # irb(main):010:0>
12
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
13
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
14
+ # node['href'] # => "#foo"
15
+ # node.keys # => ["href", "id"]
16
+ # node.values # => ["#foo", "link"]
17
+ # node['class'] = 'green' # => "green"
18
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
24
19
  #
25
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
20
+ # See the method group entitled "Working With Node Attributes" for the full set of methods.
26
21
  #
27
- # Nokogiri::XML::Node also has methods that let you move around your
22
+ # {Nokogiri::XML::Node} also has methods that let you move around your
28
23
  # tree. For navigating your tree, see:
29
24
  #
30
- # * Nokogiri::XML::Node#parent
31
- # * Nokogiri::XML::Node#children
32
- # * Nokogiri::XML::Node#next
33
- # * Nokogiri::XML::Node#previous
25
+ # * {#parent}
26
+ # * {#children}
27
+ # * {#next}
28
+ # * {#previous}
29
+ #
30
+ # When printing or otherwise emitting a document or a node (and
31
+ # its subtree), there are a few methods you might want to use:
32
+ #
33
+ # * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
34
+ # meaning that entities will be replaced (e.g., "&lt;" will be replaced with "<"), meaning
35
+ # that any sanitizing will likely be un-done in the output.
36
+ #
37
+ # * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
38
+ # properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
39
+ # parsers, etc.
40
+ #
41
+ # You may search this node's subtree using {#xpath} and {#css}
34
42
  #
35
- # You may search this node's subtree using Node#xpath and Node#css
36
43
  class Node
37
44
  include Nokogiri::XML::PP::Node
45
+ include Nokogiri::XML::Searchable
38
46
  include Enumerable
39
47
 
40
- # Element node type, see Nokogiri::XML::Node#element?
41
- ELEMENT_NODE = 1
48
+ # Element node type, see {Nokogiri::XML::Node#element?}
49
+ ELEMENT_NODE = 1
42
50
  # Attribute node type
43
- ATTRIBUTE_NODE = 2
44
- # Text node type, see Nokogiri::XML::Node#text?
45
- TEXT_NODE = 3
46
- # CDATA node type, see Nokogiri::XML::Node#cdata?
51
+ ATTRIBUTE_NODE = 2
52
+ # Text node type, see {Nokogiri::XML::Node#text?}
53
+ TEXT_NODE = 3
54
+ # CDATA node type, see {Nokogiri::XML::Node#cdata?}
47
55
  CDATA_SECTION_NODE = 4
48
56
  # Entity reference node type
49
- ENTITY_REF_NODE = 5
57
+ ENTITY_REF_NODE = 5
50
58
  # Entity node type
51
- ENTITY_NODE = 6
59
+ ENTITY_NODE = 6
52
60
  # PI node type
53
- PI_NODE = 7
54
- # Comment node type, see Nokogiri::XML::Node#comment?
55
- COMMENT_NODE = 8
56
- # Document node type, see Nokogiri::XML::Node#xml?
57
- DOCUMENT_NODE = 9
61
+ PI_NODE = 7
62
+ # Comment node type, see {Nokogiri::XML::Node#comment?}
63
+ COMMENT_NODE = 8
64
+ # Document node type, see {Nokogiri::XML::Node#xml?}
65
+ DOCUMENT_NODE = 9
58
66
  # Document type node type
59
67
  DOCUMENT_TYPE_NODE = 10
60
68
  # Document fragment node type
61
69
  DOCUMENT_FRAG_NODE = 11
62
70
  # Notation node type
63
- NOTATION_NODE = 12
64
- # HTML document node type, see Nokogiri::XML::Node#html?
71
+ NOTATION_NODE = 12
72
+ # HTML document node type, see {Nokogiri::XML::Node#html?}
65
73
  HTML_DOCUMENT_NODE = 13
66
74
  # DTD node type
67
- DTD_NODE = 14
75
+ DTD_NODE = 14
68
76
  # Element declaration type
69
- ELEMENT_DECL = 15
77
+ ELEMENT_DECL = 15
70
78
  # Attribute declaration type
71
- ATTRIBUTE_DECL = 16
79
+ ATTRIBUTE_DECL = 16
72
80
  # Entity declaration type
73
- ENTITY_DECL = 17
81
+ ENTITY_DECL = 17
74
82
  # Namespace declaration type
75
- NAMESPACE_DECL = 18
83
+ NAMESPACE_DECL = 18
76
84
  # XInclude start type
77
- XINCLUDE_START = 19
85
+ XINCLUDE_START = 19
78
86
  # XInclude end type
79
- XINCLUDE_END = 20
87
+ XINCLUDE_END = 20
80
88
  # DOCB document node type
81
89
  DOCB_DOCUMENT_NODE = 21
82
90
 
83
- def initialize name, document # :nodoc:
84
- # ... Ya. This is empty on purpose.
91
+ ##
92
+ # Create a new node with +name+ sharing GC lifecycle with +document+.
93
+ # @param name [String]
94
+ # @param document [Nokogiri::XML::Document]
95
+ # @yieldparam node [Nokogiri::XML::Node]
96
+ # @return [Nokogiri::XML::Node]
97
+ # @see Nokogiri::XML::Node.new
98
+ def initialize(name, document)
99
+ # This is intentionally empty.
85
100
  end
86
101
 
87
102
  ###
@@ -90,175 +105,18 @@ module Nokogiri
90
105
  document.decorate(self)
91
106
  end
92
107
 
93
- ###
94
- # Search this node for +paths+. +paths+ can be XPath or CSS, and an
95
- # optional hash of namespaces may be appended.
96
- # See Node#xpath and Node#css.
97
- def search *paths
98
- # TODO use paths, handler, ns, binds = extract_params(paths)
99
- ns = paths.last.is_a?(Hash) ? paths.pop :
100
- (document.root ? document.root.namespaces : {})
101
-
102
- prefix = "#{implied_xpath_context}/"
103
-
104
- xpath(*(paths.map { |path|
105
- path = path.to_s
106
- path =~ /^(\.\/|\/|\.\.|\.$)/ ? path : CSS.xpath_for(
107
- path,
108
- :prefix => prefix,
109
- :ns => ns
110
- )
111
- }.flatten.uniq) + [ns])
112
- end
113
- alias :/ :search
114
-
115
- ###
116
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
- #
118
- # Search this node for XPath +paths+. +paths+ must be one or more XPath
119
- # queries.
120
- #
121
- # node.xpath('.//title')
122
- #
123
- # A hash of namespace bindings may be appended. For example:
124
- #
125
- # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
126
- # node.xpath('.//xmlns:name', node.root.namespaces)
127
- #
128
- # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
- #
130
- # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
- #
132
- # Custom XPath functions may also be defined. To define custom
133
- # functions create a class and implement the function you want
134
- # to define. The first argument to the method will be the
135
- # current matching NodeSet. Any other arguments are ones that
136
- # you pass in. Note that this class may appear anywhere in the
137
- # argument list. For example:
138
- #
139
- # node.xpath('.//title[regex(., "\w+")]', Class.new {
140
- # def regex node_set, regex
141
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
142
- # end
143
- # }.new)
144
- #
145
- def xpath *paths
146
- return NodeSet.new(document) unless document
147
-
148
- paths, handler, ns, binds = extract_params(paths)
149
-
150
- sets = paths.map { |path|
151
- ctx = XPathContext.new(self)
152
- ctx.register_namespaces(ns)
153
- path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
154
-
155
- binds.each do |key,value|
156
- ctx.register_variable key.to_s, value
157
- end if binds
158
-
159
- ctx.evaluate(path, handler)
160
- }
161
- return sets.first if sets.length == 1
162
-
163
- NodeSet.new(document) do |combined|
164
- sets.each do |set|
165
- set.each do |node|
166
- combined << node
167
- end
168
- end
169
- end
170
- end
171
-
172
- ###
173
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
174
- #
175
- # Search this node for CSS +rules+. +rules+ must be one or more CSS
176
- # selectors. For example:
177
- #
178
- # node.css('title')
179
- # node.css('body h1.bold')
180
- # node.css('div + p.green', 'div#one')
181
- #
182
- # A hash of namespace bindings may be appended. For example:
183
- #
184
- # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
185
- #
186
- # Custom CSS pseudo classes may also be defined. To define
187
- # custom pseudo classes, create a class and implement the custom
188
- # pseudo class you want defined. The first argument to the
189
- # method will be the current matching NodeSet. Any other
190
- # arguments are ones that you pass in. For example:
191
- #
192
- # node.css('title:regex("\w+")', Class.new {
193
- # def regex node_set, regex
194
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
195
- # end
196
- # }.new)
197
- #
198
- # Note that the CSS query string is case-sensitive with regards
199
- # to your document type. That is, if you're looking for "H1" in
200
- # an HTML document, you'll never find anything, since HTML tags
201
- # will match only lowercase CSS queries. However, "H1" might be
202
- # found in an XML document, where tags names are case-sensitive
203
- # (e.g., "H1" is distinct from "h1").
204
- #
205
- def css *rules
206
- rules, handler, ns, binds = extract_params(rules)
207
-
208
- prefix = "#{implied_xpath_context}/"
209
-
210
- rules = rules.map { |rule|
211
- CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
212
- }.flatten.uniq + [ns, handler, binds].compact
213
-
214
- xpath(*rules)
215
- end
108
+ # @!group Searching via XPath or CSS Queries
216
109
 
217
110
  ###
218
111
  # Search this node's immediate children using CSS selector +selector+
219
- def > selector
112
+ def >(selector)
220
113
  ns = document.root.namespaces
221
114
  xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
222
115
  end
223
116
 
224
- ###
225
- # Search for the first occurrence of +path+.
226
- #
227
- # Returns nil if nothing is found, otherwise a Node.
228
- def at path, ns = document.root ? document.root.namespaces : {}
229
- search(path, ns).first
230
- end
231
- alias :% :at
117
+ # @!endgroup
232
118
 
233
- ##
234
- # Search this node for the first occurrence of XPath +paths+.
235
- # Equivalent to <tt>xpath(paths).first</tt>
236
- # See Node#xpath for more information.
237
- #
238
- def at_xpath *paths
239
- xpath(*paths).first
240
- end
241
-
242
- ##
243
- # Search this node for the first occurrence of CSS +rules+.
244
- # Equivalent to <tt>css(rules).first</tt>
245
- # See Node#css for more information.
246
- #
247
- def at_css *rules
248
- css(*rules).first
249
- end
250
-
251
- ###
252
- # Get the attribute value for the attribute +name+
253
- def [] name
254
- get(name.to_s)
255
- end
256
-
257
- ###
258
- # Set the attribute value for the attribute +name+ to +value+
259
- def []= name, value
260
- set name.to_s, value.to_s
261
- end
119
+ # @!group Manipulating Document Structure
262
120
 
263
121
  ###
264
122
  # Add +node_or_tags+ as a child of this Node.
@@ -267,7 +125,7 @@ module Nokogiri
267
125
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
268
126
  #
269
127
  # Also see related method +<<+.
270
- def add_child node_or_tags
128
+ def add_child(node_or_tags)
271
129
  node_or_tags = coerce(node_or_tags)
272
130
  if node_or_tags.is_a?(XML::NodeSet)
273
131
  node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
@@ -277,6 +135,34 @@ module Nokogiri
277
135
  node_or_tags
278
136
  end
279
137
 
138
+ ###
139
+ # Add +node_or_tags+ as the first child of this Node.
140
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
141
+ #
142
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
143
+ #
144
+ # Also see related method +add_child+.
145
+ def prepend_child(node_or_tags)
146
+ if first = children.first
147
+ # Mimic the error add_child would raise.
148
+ raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
149
+ first.__send__(:add_sibling, :previous, node_or_tags)
150
+ else
151
+ add_child(node_or_tags)
152
+ end
153
+ end
154
+
155
+ ###
156
+ # Add html around this node
157
+ #
158
+ # Returns self
159
+ def wrap(html)
160
+ new_parent = document.parse(html).first
161
+ add_next_sibling(new_parent)
162
+ new_parent.add_child(self)
163
+ self
164
+ end
165
+
280
166
  ###
281
167
  # Add +node_or_tags+ as a child of this Node.
282
168
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
@@ -284,10 +170,11 @@ module Nokogiri
284
170
  # Returns self, to support chaining of calls (e.g., root << child1 << child2)
285
171
  #
286
172
  # Also see related method +add_child+.
287
- def << node_or_tags
173
+ def <<(node_or_tags)
288
174
  add_child node_or_tags
289
175
  self
290
176
  end
177
+
291
178
  ###
292
179
  # Insert +node_or_tags+ before this Node (as a sibling).
293
180
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
@@ -295,8 +182,8 @@ module Nokogiri
295
182
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
296
183
  #
297
184
  # Also see related method +before+.
298
- def add_previous_sibling node_or_tags
299
- raise ArgumentError.new("A document may not have multiple root nodes.") if parent.is_a?(XML::Document) && !node_or_tags.is_a?(XML::ProcessingInstruction)
185
+ def add_previous_sibling(node_or_tags)
186
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
300
187
 
301
188
  add_sibling :previous, node_or_tags
302
189
  end
@@ -308,9 +195,9 @@ module Nokogiri
308
195
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
309
196
  #
310
197
  # Also see related method +after+.
311
- def add_next_sibling node_or_tags
312
- raise ArgumentError.new("A document may not have multiple root nodes.") if parent.is_a?(XML::Document)
313
-
198
+ def add_next_sibling(node_or_tags)
199
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
200
+
314
201
  add_sibling :next, node_or_tags
315
202
  end
316
203
 
@@ -321,7 +208,7 @@ module Nokogiri
321
208
  # Returns self, to support chaining of calls.
322
209
  #
323
210
  # Also see related method +add_previous_sibling+.
324
- def before node_or_tags
211
+ def before(node_or_tags)
325
212
  add_previous_sibling node_or_tags
326
213
  self
327
214
  end
@@ -333,7 +220,7 @@ module Nokogiri
333
220
  # Returns self, to support chaining of calls.
334
221
  #
335
222
  # Also see related method +add_next_sibling+.
336
- def after node_or_tags
223
+ def after(node_or_tags)
337
224
  add_next_sibling node_or_tags
338
225
  self
339
226
  end
@@ -345,7 +232,7 @@ module Nokogiri
345
232
  # Returns self.
346
233
  #
347
234
  # Also see related method +children=+
348
- def inner_html= node_or_tags
235
+ def inner_html=(node_or_tags)
349
236
  self.children = node_or_tags
350
237
  self
351
238
  end
@@ -357,7 +244,7 @@ module Nokogiri
357
244
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
358
245
  #
359
246
  # Also see related method +inner_html=+
360
- def children= node_or_tags
247
+ def children=(node_or_tags)
361
248
  node_or_tags = coerce(node_or_tags)
362
249
  children.unlink
363
250
  if node_or_tags.is_a?(XML::NodeSet)
@@ -375,19 +262,21 @@ module Nokogiri
375
262
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
376
263
  #
377
264
  # Also see related method +swap+.
378
- def replace node_or_tags
265
+ def replace(node_or_tags)
266
+ raise("Cannot replace a node with no parent") unless parent
267
+
379
268
  # We cannot replace a text node directly, otherwise libxml will return
380
269
  # an internal error at parser.c:13031, I don't know exactly why
381
270
  # libxml is trying to find a parent node that is an element or document
382
271
  # so I can't tell if this is bug in libxml or not. issue #775.
383
272
  if text?
384
- replacee = Nokogiri::XML::Node.new 'dummy', document
273
+ replacee = Nokogiri::XML::Node.new "dummy", document
385
274
  add_previous_sibling_node replacee
386
275
  unlink
387
276
  return replacee.replace node_or_tags
388
277
  end
389
278
 
390
- node_or_tags = coerce(node_or_tags)
279
+ node_or_tags = parent.coerce(node_or_tags)
391
280
 
392
281
  if node_or_tags.is_a?(XML::NodeSet)
393
282
  node_or_tags.each { |n| add_previous_sibling n }
@@ -405,33 +294,98 @@ module Nokogiri
405
294
  # Returns self, to support chaining of calls.
406
295
  #
407
296
  # Also see related method +replace+.
408
- def swap node_or_tags
297
+ def swap(node_or_tags)
409
298
  replace node_or_tags
410
299
  self
411
300
  end
412
301
 
413
- alias :next :next_sibling
414
- alias :previous :previous_sibling
302
+ ####
303
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
304
+ def content=(string)
305
+ self.native_content = encode_special_chars(string.to_s)
306
+ end
415
307
 
416
- # :stopdoc:
417
- # HACK: This is to work around an RDoc bug
418
- alias :next= :add_next_sibling
419
- # :startdoc:
308
+ ###
309
+ # Set the parent Node for this Node
310
+ def parent=(parent_node)
311
+ parent_node.add_child(self)
312
+ parent_node
313
+ end
420
314
 
421
- alias :previous= :add_previous_sibling
422
- alias :remove :unlink
423
- alias :get_attribute :[]
424
- alias :attr :[]
425
- alias :set_attribute :[]=
426
- alias :text :content
427
- alias :inner_text :content
428
- alias :has_attribute? :key?
429
- alias :name :node_name
430
- alias :name= :node_name=
431
- alias :type :node_type
432
- alias :to_str :text
433
- alias :clone :dup
434
- alias :elements :element_children
315
+ ###
316
+ # Adds a default namespace supplied as a string +url+ href, to self.
317
+ # The consequence is as an xmlns attribute with supplied argument were
318
+ # present in parsed XML. A default namespace set with this method will
319
+ # now show up in #attributes, but when this node is serialized to XML an
320
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
321
+ def default_namespace=(url)
322
+ add_namespace_definition(nil, url)
323
+ end
324
+
325
+ ###
326
+ # Set the default namespace on this node (as would be defined with an
327
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
328
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
329
+ # for this node. You probably want #default_namespace= instead, or perhaps
330
+ # #add_namespace_definition with a nil prefix argument.
331
+ def namespace=(ns)
332
+ return set_namespace(ns) unless ns
333
+
334
+ unless Nokogiri::XML::Namespace === ns
335
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
336
+ end
337
+ if ns.document != document
338
+ raise ArgumentError, "namespace must be declared on the same document"
339
+ end
340
+
341
+ set_namespace ns
342
+ end
343
+
344
+ ###
345
+ # Do xinclude substitution on the subtree below node. If given a block, a
346
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
347
+ # passed to it, allowing more convenient modification of the parser options.
348
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
349
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
350
+
351
+ # give options to user
352
+ yield options if block_given?
353
+
354
+ # call c extension
355
+ process_xincludes(options.to_i)
356
+ end
357
+
358
+ alias :next :next_sibling
359
+ alias :previous :previous_sibling
360
+ alias :next= :add_next_sibling
361
+ alias :previous= :add_previous_sibling
362
+ alias :remove :unlink
363
+ alias :name= :node_name=
364
+ alias :add_namespace :add_namespace_definition
365
+
366
+ # @!endgroup
367
+
368
+ alias :text :content
369
+ alias :inner_text :content
370
+ alias :name :node_name
371
+ alias :type :node_type
372
+ alias :to_str :text
373
+ alias :clone :dup
374
+ alias :elements :element_children
375
+
376
+ # @!group Working With Node Attributes
377
+
378
+ ###
379
+ # Get the attribute value for the attribute +name+
380
+ def [](name)
381
+ get(name.to_s)
382
+ end
383
+
384
+ ###
385
+ # Set the attribute value for the attribute +name+ to +value+
386
+ def []=(name, value)
387
+ set name.to_s, value.to_s
388
+ end
435
389
 
436
390
  ####
437
391
  # Returns a hash containing the node's attributes. The key is
@@ -440,21 +394,27 @@ module Nokogiri
440
394
  # If you need to distinguish attributes with the same name, with different namespaces
441
395
  # use #attribute_nodes instead.
442
396
  def attributes
443
- Hash[attribute_nodes.map { |node|
444
- [node.node_name, node]
445
- }]
397
+ attribute_nodes.each_with_object({}) do |node, hash|
398
+ hash[node.node_name] = node
399
+ end
446
400
  end
447
401
 
448
402
  ###
449
403
  # Get the attribute values for this Node.
450
404
  def values
451
- attribute_nodes.map { |node| node.value }
405
+ attribute_nodes.map(&:value)
406
+ end
407
+
408
+ ###
409
+ # Does this Node's attributes include <value>
410
+ def value?(value)
411
+ values.include? value
452
412
  end
453
413
 
454
414
  ###
455
415
  # Get the attribute names for this Node.
456
416
  def keys
457
- attribute_nodes.map { |node| node.node_name }
417
+ attribute_nodes.map(&:node_name)
458
418
  end
459
419
 
460
420
  ###
@@ -467,21 +427,365 @@ module Nokogiri
467
427
 
468
428
  ###
469
429
  # Remove the attribute named +name+
470
- def remove_attribute name
471
- attributes[name].remove if key? name
430
+ def remove_attribute(name)
431
+ attr = attributes[name].remove if key? name
432
+ clear_xpath_context if Nokogiri.jruby?
433
+ attr
434
+ end
435
+
436
+ # Get the CSS class names of a Node.
437
+ #
438
+ # This is a convenience function and is equivalent to:
439
+ # node.kwattr_values("class")
440
+ #
441
+ # @see #kwattr_values
442
+ # @see #add_class
443
+ # @see #append_class
444
+ # @see #remove_class
445
+ #
446
+ # @return [Array<String>]
447
+ #
448
+ # The CSS classes present in the Node's +class+ attribute. If
449
+ # the attribute is empty or non-existent, the return value is
450
+ # an empty array.
451
+ #
452
+ # @example
453
+ # node # => <div class="section title header"></div>
454
+ # node.classes # => ["section", "title", "header"]
455
+ #
456
+ def classes
457
+ kwattr_values("class")
458
+ end
459
+
460
+ # Ensure HTML CSS classes are present on a +Node+. Any CSS
461
+ # classes in +names+ that already exist in the +Node+'s +class+
462
+ # attribute are _not_ added. Note that any existing duplicates
463
+ # in the +class+ attribute are not removed. Compare with
464
+ # {#append_class}.
465
+ #
466
+ # This is a convenience function and is equivalent to:
467
+ # node.kwattr_add("class", names)
468
+ #
469
+ # @see #kwattr_add
470
+ # @see #classes
471
+ # @see #append_class
472
+ # @see #remove_class
473
+ #
474
+ # @param names [String, Array<String>]
475
+ #
476
+ # CSS class names to be added to the Node's +class+
477
+ # attribute. May be a string containing whitespace-delimited
478
+ # names, or an Array of String names. Any class names already
479
+ # present will not be added. Any class names not present will
480
+ # be added. If no +class+ attribute exists, one is created.
481
+ #
482
+ # @return [Node] Returns +self+ for ease of chaining method calls.
483
+ #
484
+ # @example Ensure that a +Node+ has CSS class "section"
485
+ # node # => <div></div>
486
+ # node.add_class("section") # => <div class="section"></div>
487
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
488
+ #
489
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
490
+ # node # => <div class="section section"></div>
491
+ # node.add_class("section header") # => <div class="section section header"></div>
492
+ # # Note that the CSS class "section" is not added because it is already present.
493
+ # # Note also that the pre-existing duplicate CSS class "section" is not removed.
494
+ #
495
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
496
+ # node # => <div></div>
497
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
498
+ #
499
+ def add_class(names)
500
+ kwattr_add("class", names)
501
+ end
502
+
503
+ # Add HTML CSS classes to a +Node+, regardless of
504
+ # duplication. Compare with {#add_class}.
505
+ #
506
+ # This is a convenience function and is equivalent to:
507
+ # node.kwattr_append("class", names)
508
+ #
509
+ # @see #kwattr_append
510
+ # @see #classes
511
+ # @see #add_class
512
+ # @see #remove_class
513
+ #
514
+ # @param names [String, Array<String>]
515
+ #
516
+ # CSS class names to be appended to the Node's +class+
517
+ # attribute. May be a string containing whitespace-delimited
518
+ # names, or an Array of String names. All class names passed
519
+ # in will be appended to the +class+ attribute even if they
520
+ # are already present in the attribute value. If no +class+
521
+ # attribute exists, one is created.
522
+ #
523
+ # @return [Node] Returns +self+ for ease of chaining method calls.
524
+ #
525
+ # @example Append "section" to a +Node+'s CSS +class+ attriubute
526
+ # node # => <div></div>
527
+ # node.append_class("section") # => <div class="section"></div>
528
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
529
+ #
530
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
531
+ # node # => <div class="section section"></div>
532
+ # node.append_class("section header") # => <div class="section section section header"></div>
533
+ # # Note that the CSS class "section" is appended even though it is already present.
534
+ #
535
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
536
+ # node # => <div></div>
537
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
538
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
539
+ #
540
+ def append_class(names)
541
+ kwattr_append("class", names)
542
+ end
543
+
544
+ # Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
545
+ # exist in the +Node+'s +class+ attribute are removed, including any
546
+ # multiple entries.
547
+ #
548
+ # If no CSS classes remain after this operation, or if +names+ is
549
+ # +nil+, the +class+ attribute is deleted from the node.
550
+ #
551
+ # This is a convenience function and is equivalent to:
552
+ # node.kwattr_remove("class", names)
553
+ #
554
+ # @see #kwattr_remove
555
+ # @see #classes
556
+ # @see #add_class
557
+ # @see #append_class
558
+ #
559
+ # @param names [String, Array<String>]
560
+ #
561
+ # CSS class names to be removed from the Node's +class+ attribute. May
562
+ # be a string containing whitespace-delimited names, or an Array of
563
+ # String names. Any class names already present will be removed. If no
564
+ # CSS classes remain, the +class+ attribute is deleted.
565
+ #
566
+ # @return [Node] Returns +self+ for ease of chaining method calls.
567
+ #
568
+ # @example
569
+ # node # => <div class="section header"></div>
570
+ # node.remove_class("section") # => <div class="header"></div>
571
+ # node.remove_class("header") # => <div></div> # attribute is deleted when empty
572
+ #
573
+ def remove_class(names = nil)
574
+ kwattr_remove("class", names)
575
+ end
576
+
577
+ # Retrieve values from a keyword attribute of a Node.
578
+ #
579
+ # A "keyword attribute" is a node attribute that contains a set
580
+ # of space-delimited values. Perhaps the most familiar example
581
+ # of this is the HTML +class+ attribute used to contain CSS
582
+ # classes. But other keyword attributes exist, for instance
583
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
584
+ #
585
+ # @see #classes
586
+ # @see #kwattr_add
587
+ # @see #kwattr_append
588
+ # @see #kwattr_remove
589
+ #
590
+ # @param attribute_name [String] The name of the keyword attribute to be inspected.
591
+ #
592
+ # @return [Array<String>]
593
+ #
594
+ # The values present in the Node's +attribute_name+
595
+ # attribute. If the attribute is empty or non-existent, the
596
+ # return value is an empty array.
597
+ #
598
+ # @example
599
+ # node # => <a rel="nofollow noopener external">link</a>
600
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
601
+ #
602
+ # @since v1.11.0
603
+ #
604
+ def kwattr_values(attribute_name)
605
+ keywordify(get_attribute(attribute_name) || [])
472
606
  end
607
+
608
+ # Ensure that values are present in a keyword attribute.
609
+ #
610
+ # Any values in +keywords+ that already exist in the +Node+'s
611
+ # attribute values are _not_ added. Note that any existing
612
+ # duplicates in the attribute values are not removed. Compare
613
+ # with {#kwattr_append}.
614
+ #
615
+ # A "keyword attribute" is a node attribute that contains a set
616
+ # of space-delimited values. Perhaps the most familiar example
617
+ # of this is the HTML +class+ attribute used to contain CSS
618
+ # classes. But other keyword attributes exist, for instance
619
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
620
+ #
621
+ # @see #add_class
622
+ # @see #kwattr_values
623
+ # @see #kwattr_append
624
+ # @see #kwattr_remove
625
+ #
626
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
627
+ #
628
+ # @param keywords [String, Array<String>]
629
+ #
630
+ # Keywords to be added to the attribute named
631
+ # +attribute_name+. May be a string containing
632
+ # whitespace-delimited values, or an Array of String
633
+ # values. Any values already present will not be added. Any
634
+ # values not present will be added. If the named attribute
635
+ # does not exist, it is created.
636
+ #
637
+ # @return [Node] Returns +self+ for ease of chaining method calls.
638
+ #
639
+ # @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
640
+ # node # => <a></a>
641
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
642
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
643
+ #
644
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
645
+ # node # => <a rel="nofollow nofollow"></a>
646
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
647
+ # # Note that "nofollow" is not added because it is already present.
648
+ # # Note also that the pre-existing duplicate "nofollow" is not removed.
649
+ #
650
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
651
+ # node # => <a></a>
652
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
653
+ #
654
+ # @since v1.11.0
655
+ #
656
+ def kwattr_add(attribute_name, keywords)
657
+ keywords = keywordify(keywords)
658
+ current_kws = kwattr_values(attribute_name)
659
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
660
+ set_attribute(attribute_name, new_kws)
661
+ self
662
+ end
663
+
664
+ # Add keywords to a Node's keyword attribute, regardless of
665
+ # duplication. Compare with {#kwattr_add}.
666
+ #
667
+ # A "keyword attribute" is a node attribute that contains a set
668
+ # of space-delimited values. Perhaps the most familiar example
669
+ # of this is the HTML +class+ attribute used to contain CSS
670
+ # classes. But other keyword attributes exist, for instance
671
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
672
+ #
673
+ # @see #append_class
674
+ # @see #kwattr_values
675
+ # @see #kwattr_add
676
+ # @see #kwattr_remove
677
+ #
678
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
679
+ #
680
+ # @param keywords [String, Array<String>]
681
+ #
682
+ # Keywords to be added to the attribute named
683
+ # +attribute_name+. May be a string containing
684
+ # whitespace-delimited values, or an Array of String
685
+ # values. All values passed in will be appended to the named
686
+ # attribute even if they are already present in the
687
+ # attribute. If the named attribute does not exist, it is
688
+ # created.
689
+ #
690
+ # @return [Node] Returns +self+ for ease of chaining method calls.
691
+ #
692
+ # @example Append "nofollow" to the +rel+ attribute.
693
+ # node # => <a></a>
694
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
695
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
696
+ #
697
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
698
+ # node # => <a rel="nofollow"></a>
699
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
700
+ # # Note that "nofollow" is appended even though it is already present.
701
+ #
702
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
703
+ # node # => <a></a>
704
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
705
+ #
706
+ # @since v1.11.0
707
+ #
708
+ def kwattr_append(attribute_name, keywords)
709
+ keywords = keywordify(keywords)
710
+ current_kws = kwattr_values(attribute_name)
711
+ new_kws = (current_kws + keywords).join(" ")
712
+ set_attribute(attribute_name, new_kws)
713
+ self
714
+ end
715
+
716
+ # Remove keywords from a keyword attribute. Any matching
717
+ # keywords that exist in the named attribute are removed,
718
+ # including any multiple entries.
719
+ #
720
+ # If no keywords remain after this operation, or if +keywords+
721
+ # is +nil+, the attribute is deleted from the node.
722
+ #
723
+ # A "keyword attribute" is a node attribute that contains a set
724
+ # of space-delimited values. Perhaps the most familiar example
725
+ # of this is the HTML +class+ attribute used to contain CSS
726
+ # classes. But other keyword attributes exist, for instance
727
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
728
+ #
729
+ # @see #remove_class
730
+ # @see #kwattr_values
731
+ # @see #kwattr_add
732
+ # @see #kwattr_append
733
+ #
734
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
735
+ #
736
+ # @param keywords [String, Array<String>]
737
+ #
738
+ # Keywords to be removed from the attribute named
739
+ # +attribute_name+. May be a string containing
740
+ # whitespace-delimited values, or an Array of String
741
+ # values. Any keywords present in the named attribute will be
742
+ # removed. If no keywords remain, or if +keywords+ is nil, the
743
+ # attribute is deleted.
744
+ #
745
+ # @return [Node] Returns +self+ for ease of chaining method calls.
746
+ #
747
+ # @example
748
+ # node # => <a rel="nofollow noreferrer">link</a>
749
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
750
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
751
+ #
752
+ # @since v1.11.0
753
+ #
754
+ def kwattr_remove(attribute_name, keywords)
755
+ if keywords.nil?
756
+ remove_attribute(attribute_name)
757
+ return self
758
+ end
759
+
760
+ keywords = keywordify(keywords)
761
+ current_kws = kwattr_values(attribute_name)
762
+ new_kws = current_kws - keywords
763
+ if new_kws.empty?
764
+ remove_attribute(attribute_name)
765
+ else
766
+ set_attribute(attribute_name, new_kws.join(" "))
767
+ end
768
+ self
769
+ end
770
+
473
771
  alias :delete :remove_attribute
772
+ alias :get_attribute :[]
773
+ alias :attr :[]
774
+ alias :set_attribute :[]=
775
+ alias :has_attribute? :key?
776
+
777
+ # @!endgroup
474
778
 
475
779
  ###
476
780
  # Returns true if this Node matches +selector+
477
- def matches? selector
781
+ def matches?(selector)
478
782
  ancestors.last.search(selector).include?(self)
479
783
  end
480
784
 
481
785
  ###
482
786
  # Create a DocumentFragment containing +tags+ that is relative to _this_
483
787
  # context node.
484
- def fragment tags
788
+ def fragment(tags)
485
789
  type = document.html? ? Nokogiri::HTML : Nokogiri::XML
486
790
  type::DocumentFragment.new(document, tags, self)
487
791
  end
@@ -490,9 +794,18 @@ module Nokogiri
490
794
  # Parse +string_or_io+ as a document fragment within the context of
491
795
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
492
796
  # +string_or_io+.
493
- def parse string_or_io, options = nil
797
+ def parse(string_or_io, options = nil)
798
+ ##
799
+ # When the current node is unparented and not an element node, use the
800
+ # document as the parsing context instead. Otherwise, the in-context
801
+ # parser cannot find an element or a document node.
802
+ # Document Fragments are also not usable by the in-context parser.
803
+ if !element? && !document? && (!parent || parent.fragment?)
804
+ return document.parse(string_or_io, options)
805
+ end
806
+
494
807
  options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
495
- if Fixnum === options
808
+ if Integer === options
496
809
  options = Nokogiri::XML::ParseOptions.new(options)
497
810
  end
498
811
  # Give the options to the user
@@ -504,32 +817,36 @@ module Nokogiri
504
817
 
505
818
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
506
819
 
507
- ##
508
- # This is a horrible hack, but I don't care. See #313 for background.
820
+ # libxml2 does not obey the `recover` option after encountering errors during `in_context`
821
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
822
+ #
823
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
824
+ # would have been inherited from the context node won't be handled correctly. This hack was
825
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
826
+ # that's not easily prevented (or even detected).
827
+ #
828
+ # I think preferable behavior would be to either:
829
+ #
830
+ # a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
831
+ # b. don't recover, but raise a sensible exception
832
+ #
833
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
834
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
509
835
  error_count = document.errors.length
510
836
  node_set = in_context(contents, options.to_i)
511
- if node_set.empty? and document.errors.length > error_count and options.recover?
512
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
513
- node_set = fragment.children
837
+ if (node_set.empty? && (document.errors.length > error_count))
838
+ if options.recover?
839
+ fragment = Nokogiri::HTML4::DocumentFragment.parse contents
840
+ node_set = fragment.children
841
+ else
842
+ raise document.errors[error_count]
843
+ end
514
844
  end
515
845
  node_set
516
846
  end
517
847
 
518
- ####
519
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
520
- def content= string
521
- self.native_content = encode_special_chars(string.to_s)
522
- end
523
-
524
- ###
525
- # Set the parent Node for this Node
526
- def parent= parent_node
527
- parent_node.add_child(self)
528
- parent_node
529
- end
530
-
531
848
  ###
532
- # Returns a Hash of {prefix => value} for all namespaces on this
849
+ # Returns a Hash of +{prefix => value}+ for all namespaces on this
533
850
  # node and its ancestors.
534
851
  #
535
852
  # This method returns the same namespaces as #namespace_scopes.
@@ -543,16 +860,11 @@ module Nokogiri
543
860
  # default namespaces set on ancestor will NOT be, even if self
544
861
  # has no explicit default namespace.
545
862
  def namespaces
546
- Hash[namespace_scopes.map { |nd|
547
- key = ['xmlns', nd.prefix].compact.join(':')
548
- if RUBY_VERSION >= '1.9' && document.encoding
549
- begin
550
- key.force_encoding document.encoding
551
- rescue ArgumentError
552
- end
553
- end
554
- [key, nd.href]
555
- }]
863
+ namespace_scopes.each_with_object({}) do |ns, hash|
864
+ prefix = ns.prefix
865
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
866
+ hash[key] = ns.href
867
+ end
556
868
  end
557
869
 
558
870
  # Returns true if this is a Comment
@@ -570,11 +882,21 @@ module Nokogiri
570
882
  type == DOCUMENT_NODE
571
883
  end
572
884
 
573
- # Returns true if this is an HTML::Document node
885
+ # Returns true if this is an HTML4::Document node
574
886
  def html?
575
887
  type == HTML_DOCUMENT_NODE
576
888
  end
577
889
 
890
+ # Returns true if this is a Document
891
+ def document?
892
+ is_a? XML::Document
893
+ end
894
+
895
+ # Returns true if this is a ProcessingInstruction node
896
+ def processing_instruction?
897
+ type == PI_NODE
898
+ end
899
+
578
900
  # Returns true if this is a Text node
579
901
  def text?
580
902
  type == TEXT_NODE
@@ -586,11 +908,11 @@ module Nokogiri
586
908
  end
587
909
 
588
910
  ###
589
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
911
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
590
912
  # nil on XML documents and on unknown tags.
591
913
  def description
592
914
  return nil if document.xml?
593
- Nokogiri::HTML::ElementDescription[name]
915
+ Nokogiri::HTML4::ElementDescription[name]
594
916
  end
595
917
 
596
918
  ###
@@ -604,6 +926,7 @@ module Nokogiri
604
926
  def element?
605
927
  type == ELEMENT_NODE
606
928
  end
929
+
607
930
  alias :elem? :element?
608
931
 
609
932
  ###
@@ -614,7 +937,7 @@ module Nokogiri
614
937
  end
615
938
 
616
939
  # Get the inner_html for this node's Node#children
617
- def inner_html *args
940
+ def inner_html(*args)
618
941
  children.map { |x| x.to_html(*args) }.join
619
942
  end
620
943
 
@@ -622,13 +945,13 @@ module Nokogiri
622
945
  def css_path
623
946
  path.split(/\//).map { |part|
624
947
  part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
625
- }.compact.join(' > ')
948
+ }.compact.join(" > ")
626
949
  end
627
950
 
628
951
  ###
629
952
  # Get a list of ancestor Node for this Node. If +selector+ is given,
630
953
  # the ancestors must match +selector+
631
- def ancestors selector = nil
954
+ def ancestors(selector = nil)
632
955
  return NodeSet.new(document) unless respond_to?(:parent)
633
956
  return NodeSet.new(document) unless parent
634
957
 
@@ -642,63 +965,45 @@ module Nokogiri
642
965
  return NodeSet.new(document, parents) unless selector
643
966
 
644
967
  root = parents.last
968
+ search_results = root.search(selector)
645
969
 
646
970
  NodeSet.new(document, parents.find_all { |parent|
647
- root.search(selector).include?(parent)
971
+ search_results.include?(parent)
648
972
  })
649
973
  end
650
974
 
651
- ###
652
- # Adds a default namespace supplied as a string +url+ href, to self.
653
- # The consequence is as an xmlns attribute with supplied argument were
654
- # present in parsed XML. A default namespace set with this method will
655
- # now show up in #attributes, but when this node is serialized to XML an
656
- # "xmlns" attribute will appear. See also #namespace and #namespace=
657
- def default_namespace= url
658
- add_namespace_definition(nil, url)
659
- end
660
- alias :add_namespace :add_namespace_definition
661
-
662
- ###
663
- # Set the default namespace on this node (as would be defined with an
664
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
665
- # a Namespace added this way will NOT be serialized as an xmlns attribute
666
- # for this node. You probably want #default_namespace= instead, or perhaps
667
- # #add_namespace_definition with a nil prefix argument.
668
- def namespace= ns
669
- return set_namespace(ns) unless ns
670
-
671
- unless Nokogiri::XML::Namespace === ns
672
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
673
- end
674
- if ns.document != document
675
- raise ArgumentError, 'namespace must be declared on the same document'
676
- end
677
-
678
- set_namespace ns
679
- end
680
-
681
975
  ####
682
976
  # Yields self and all children to +block+ recursively.
683
- def traverse &block
684
- children.each{|j| j.traverse(&block) }
977
+ def traverse(&block)
978
+ children.each { |j| j.traverse(&block) }
685
979
  block.call(self)
686
980
  end
687
981
 
688
982
  ###
689
983
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
690
- def accept visitor
984
+ def accept(visitor)
691
985
  visitor.visit(self)
692
986
  end
693
987
 
694
988
  ###
695
989
  # Test to see if this Node is equal to +other+
696
- def == other
990
+ def ==(other)
697
991
  return false unless other
698
992
  return false unless other.respond_to?(:pointer_id)
699
993
  pointer_id == other.pointer_id
700
994
  end
701
995
 
996
+ ###
997
+ # Compare two Node objects with respect to their Document. Nodes from
998
+ # different documents cannot be compared.
999
+ def <=>(other)
1000
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1001
+ return nil unless document == other.document
1002
+ compare other
1003
+ end
1004
+
1005
+ # @!group Serialization and Generating Output
1006
+
702
1007
  ###
703
1008
  # Serialize Node using +options+. Save options can also be set using a
704
1009
  # block. See SaveOptions.
@@ -713,19 +1018,17 @@ module Nokogiri
713
1018
  # config.format.as_xml
714
1019
  # end
715
1020
  #
716
- def serialize *args, &block
1021
+ def serialize(*args, &block)
717
1022
  options = args.first.is_a?(Hash) ? args.shift : {
718
- :encoding => args[0],
719
- :save_with => args[1]
1023
+ :encoding => args[0],
1024
+ :save_with => args[1],
720
1025
  }
721
1026
 
722
1027
  encoding = options[:encoding] || document.encoding
723
1028
  options[:encoding] = encoding
724
1029
 
725
- outstring = ""
726
- if encoding && outstring.respond_to?(:force_encoding)
727
- outstring.force_encoding(Encoding.find(encoding))
728
- end
1030
+ outstring = String.new
1031
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
729
1032
  io = StringIO.new(outstring)
730
1033
  write_to io, options, &block
731
1034
  io.string
@@ -738,7 +1041,7 @@ module Nokogiri
738
1041
  #
739
1042
  # See Node#write_to for a list of +options+. For formatted output,
740
1043
  # use Node#to_xhtml instead.
741
- def to_html options = {}
1044
+ def to_html(options = {})
742
1045
  to_format SaveOptions::DEFAULT_HTML, options
743
1046
  end
744
1047
 
@@ -748,7 +1051,7 @@ module Nokogiri
748
1051
  # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
749
1052
  #
750
1053
  # See Node#write_to for a list of +options+
751
- def to_xml options = {}
1054
+ def to_xml(options = {})
752
1055
  options[:save_with] ||= SaveOptions::DEFAULT_XML
753
1056
  serialize(options)
754
1057
  end
@@ -759,7 +1062,7 @@ module Nokogiri
759
1062
  # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
760
1063
  #
761
1064
  # See Node#write_to for a list of +options+
762
- def to_xhtml options = {}
1065
+ def to_xhtml(options = {})
763
1066
  to_format SaveOptions::DEFAULT_XHTML, options
764
1067
  end
765
1068
 
@@ -778,31 +1081,36 @@ module Nokogiri
778
1081
  #
779
1082
  # To save indented with two dashes:
780
1083
  #
781
- # node.write_to(io, :indent_text => '-', :indent => 2
1084
+ # node.write_to(io, :indent_text => '-', :indent => 2)
782
1085
  #
783
- def write_to io, *options
784
- options = options.first.is_a?(Hash) ? options.shift : {}
785
- encoding = options[:encoding] || options[0]
1086
+ def write_to(io, *options)
1087
+ options = options.first.is_a?(Hash) ? options.shift : {}
1088
+ encoding = options[:encoding] || options[0]
786
1089
  if Nokogiri.jruby?
787
- save_options = options[:save_with] || options[1]
788
- indent_times = options[:indent] || 0
1090
+ save_options = options[:save_with] || options[1]
1091
+ indent_times = options[:indent] || 0
789
1092
  else
790
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
791
- indent_times = options[:indent] || 2
1093
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1094
+ indent_times = options[:indent] || 2
792
1095
  end
793
- indent_text = options[:indent_text] || ' '
1096
+ indent_text = options[:indent_text] || " "
1097
+
1098
+ # Any string times 0 returns an empty string. Therefore, use the same
1099
+ # string instead of generating a new empty string for every node with
1100
+ # zero indentation.
1101
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
794
1102
 
795
1103
  config = SaveOptions.new(save_options.to_i)
796
1104
  yield config if block_given?
797
1105
 
798
- native_write_to(io, encoding, indent_text * indent_times, config.options)
1106
+ native_write_to(io, encoding, indentation, config.options)
799
1107
  end
800
1108
 
801
1109
  ###
802
1110
  # Write Node as HTML to +io+ with +options+
803
1111
  #
804
1112
  # See Node#write_to for a list of +options+
805
- def write_html_to io, options = {}
1113
+ def write_html_to(io, options = {})
806
1114
  write_format_to SaveOptions::DEFAULT_HTML, io, options
807
1115
  end
808
1116
 
@@ -810,7 +1118,7 @@ module Nokogiri
810
1118
  # Write Node as XHTML to +io+ with +options+
811
1119
  #
812
1120
  # See Node#write_to for a list of +options+
813
- def write_xhtml_to io, options = {}
1121
+ def write_xhtml_to(io, options = {})
814
1122
  write_format_to SaveOptions::DEFAULT_XHTML, io, options
815
1123
  end
816
1124
 
@@ -820,52 +1128,66 @@ module Nokogiri
820
1128
  # doc.write_xml_to io, :encoding => 'UTF-8'
821
1129
  #
822
1130
  # See Node#write_to for a list of options
823
- def write_xml_to io, options = {}
1131
+ def write_xml_to(io, options = {})
824
1132
  options[:save_with] ||= SaveOptions::DEFAULT_XML
825
1133
  write_to io, options
826
1134
  end
827
1135
 
828
- ###
829
- # Compare two Node objects with respect to their Document. Nodes from
830
- # different documents cannot be compared.
831
- def <=> other
832
- return nil unless other.is_a?(Nokogiri::XML::Node)
833
- return nil unless document == other.document
834
- compare other
1136
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1137
+ c14n_root = self
1138
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1139
+ tn = node.is_a?(XML::Node) ? node : parent
1140
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1141
+ end
835
1142
  end
836
1143
 
837
- ###
838
- # Do xinclude substitution on the subtree below node. If given a block, a
839
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
840
- # passed to it, allowing more convenient modification of the parser options.
841
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
842
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
1144
+ # @!endgroup
843
1145
 
844
- # give options to user
845
- yield options if block_given?
1146
+ protected
846
1147
 
847
- # call c extension
848
- process_xincludes(options.to_i)
1148
+ def coerce(data)
1149
+ case data
1150
+ when XML::NodeSet
1151
+ return data
1152
+ when XML::DocumentFragment
1153
+ return data.children
1154
+ when String
1155
+ return fragment(data).children
1156
+ when Document, XML::Attr
1157
+ # unacceptable
1158
+ when XML::Node
1159
+ return data
1160
+ end
1161
+
1162
+ raise ArgumentError, <<-EOERR
1163
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1164
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1165
+ EOERR
849
1166
  end
850
1167
 
851
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
852
- c14n_root = self
853
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
854
- tn = node.is_a?(XML::Node) ? node : parent
855
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1168
+ private
1169
+
1170
+ def keywordify(keywords)
1171
+ case keywords
1172
+ when Enumerable
1173
+ return keywords
1174
+ when String
1175
+ return keywords.scan(/\S+/)
1176
+ else
1177
+ raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
856
1178
  end
857
1179
  end
858
1180
 
859
- private
1181
+ def add_sibling(next_or_previous, node_or_tags)
1182
+ raise("Cannot add sibling to a node with no parent") unless parent
860
1183
 
861
- def add_sibling next_or_previous, node_or_tags
862
1184
  impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
863
- iter = (next_or_previous == :next) ? :reverse_each : :each
1185
+ iter = (next_or_previous == :next) ? :reverse_each : :each
864
1186
 
865
- node_or_tags = coerce node_or_tags
1187
+ node_or_tags = parent.coerce(node_or_tags)
866
1188
  if node_or_tags.is_a?(XML::NodeSet)
867
1189
  if text?
868
- pivot = Nokogiri::XML::Node.new 'dummy', document
1190
+ pivot = Nokogiri::XML::Node.new "dummy", document
869
1191
  send impl, pivot
870
1192
  else
871
1193
  pivot = self
@@ -878,80 +1200,39 @@ module Nokogiri
878
1200
  node_or_tags
879
1201
  end
880
1202
 
881
- def to_format save_option, options
882
- # FIXME: this is a hack around broken libxml versions
883
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1203
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1204
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1205
+
1206
+ def to_format(save_option, options)
1207
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
884
1208
 
885
- options[:save_with] |= save_option if options[:save_with]
886
1209
  options[:save_with] = save_option unless options[:save_with]
887
1210
  serialize(options)
888
1211
  end
889
1212
 
890
- def write_format_to save_option, io, options
891
- # FIXME: this is a hack around broken libxml versions
892
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1213
+ def write_format_to(save_option, io, options)
1214
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
893
1215
 
894
1216
  options[:save_with] ||= save_option
895
1217
  write_to io, options
896
1218
  end
897
1219
 
898
- def extract_params params # :nodoc:
899
- # Pop off our custom function handler if it exists
900
- handler = params.find { |param|
901
- ![Hash, String, Symbol].include?(param.class)
902
- }
903
-
904
- params -= [handler] if handler
905
-
906
- hashes = []
907
- while Hash === params.last || params.last.nil?
908
- hashes << params.pop
909
- break if params.empty?
910
- end
911
-
912
- ns, binds = hashes.reverse
913
-
914
- ns ||= document.root ? document.root.namespaces : {}
915
-
916
- [params, handler, ns, binds]
917
- end
918
-
919
- def coerce data # :nodoc:
920
- case data
921
- when XML::NodeSet
922
- return data
923
- when XML::DocumentFragment
924
- return data.children
925
- when String
926
- return fragment(data).children
927
- when Document, XML::Attr
928
- # unacceptable
929
- when XML::Node
930
- return data
931
- end
932
-
933
- raise ArgumentError, <<-EOERR
934
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
935
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
936
- EOERR
937
- end
938
-
939
- def implied_xpath_context
940
- "./"
941
- end
942
-
943
1220
  def inspect_attributes
944
1221
  [:name, :namespace, :attribute_nodes, :children]
945
1222
  end
946
1223
 
947
- def add_child_node_and_reparent_attrs node
1224
+ # @private
1225
+ IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
1226
+
1227
+ def add_child_node_and_reparent_attrs(node)
948
1228
  add_child_node node
949
1229
  node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
950
1230
  attr_node.remove
951
1231
  node[attr_node.name] = attr_node.value
952
1232
  end
953
1233
  end
954
-
955
1234
  end
956
1235
  end
957
1236
  end
1237
+
1238
+ require_relative "node/save_options"