nokogiri 1.10.3 → 1.13.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (223) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +178 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +11 -60
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +752 -423
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +191 -89
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +15 -15
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +13 -18
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -219
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +60 -51
  33. data/ext/nokogiri/xml_node.c +1001 -610
  34. data/ext/nokogiri/xml_node_set.c +174 -162
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +99 -103
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +14 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +95 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +219 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +46 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +876 -376
  139. data/lib/nokogiri/xml/node_set.rb +47 -54
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +21 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +37 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  166. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +207 -137
  177. data/ext/nokogiri/html_document.c +0 -170
  178. data/ext/nokogiri/html_document.h +0 -10
  179. data/ext/nokogiri/html_element_description.c +0 -279
  180. data/ext/nokogiri/html_element_description.h +0 -10
  181. data/ext/nokogiri/html_entity_lookup.c +0 -32
  182. data/ext/nokogiri/html_entity_lookup.h +0 -8
  183. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  184. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  185. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  186. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  187. data/ext/nokogiri/xml_attr.h +0 -9
  188. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  189. data/ext/nokogiri/xml_cdata.h +0 -9
  190. data/ext/nokogiri/xml_comment.h +0 -9
  191. data/ext/nokogiri/xml_document.h +0 -23
  192. data/ext/nokogiri/xml_document_fragment.h +0 -10
  193. data/ext/nokogiri/xml_dtd.h +0 -10
  194. data/ext/nokogiri/xml_element_content.h +0 -10
  195. data/ext/nokogiri/xml_element_decl.h +0 -9
  196. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  197. data/ext/nokogiri/xml_entity_decl.h +0 -10
  198. data/ext/nokogiri/xml_entity_reference.h +0 -9
  199. data/ext/nokogiri/xml_io.c +0 -61
  200. data/ext/nokogiri/xml_io.h +0 -11
  201. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  202. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  203. data/ext/nokogiri/xml_namespace.h +0 -14
  204. data/ext/nokogiri/xml_node.h +0 -13
  205. data/ext/nokogiri/xml_node_set.h +0 -12
  206. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  207. data/ext/nokogiri/xml_reader.h +0 -10
  208. data/ext/nokogiri/xml_relax_ng.h +0 -9
  209. data/ext/nokogiri/xml_sax_parser.h +0 -39
  210. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  211. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  212. data/ext/nokogiri/xml_schema.h +0 -9
  213. data/ext/nokogiri/xml_syntax_error.h +0 -13
  214. data/ext/nokogiri/xml_text.h +0 -9
  215. data/ext/nokogiri/xml_xpath_context.h +0 -10
  216. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  222. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  223. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,105 +1,130 @@
1
- # encoding: UTF-8
2
- require 'stringio'
3
- require 'nokogiri/xml/node/save_options'
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "stringio"
4
5
 
5
6
  module Nokogiri
6
7
  module XML
7
- ####
8
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
9
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
10
- # to a hash with regard to attributes. For example (from irb):
8
+ # Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
9
+ #
10
+ # == Attributes
11
+ #
12
+ # A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
13
+ # example:
14
+ #
15
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
16
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
17
+ # node['href'] # => "#foo"
18
+ # node.keys # => ["href", "id"]
19
+ # node.values # => ["#foo", "link"]
20
+ # node['class'] = 'green' # => "green"
21
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
22
+ #
23
+ # See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
24
+ #
25
+ # == Navigation
11
26
  #
12
- # irb(main):004:0> node
13
- # => <a href="#foo" id="link">link</a>
14
- # irb(main):005:0> node['href']
15
- # => "#foo"
16
- # irb(main):006:0> node.keys
17
- # => ["href", "id"]
18
- # irb(main):007:0> node.values
19
- # => ["#foo", "link"]
20
- # irb(main):008:0> node['class'] = 'green'
21
- # => "green"
22
- # irb(main):009:0> node
23
- # => <a href="#foo" id="link" class="green">link</a>
24
- # irb(main):010:0>
27
+ # Nokogiri::XML::Node also has methods that let you move around your tree:
25
28
  #
26
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
29
+ # [#parent, #children, #next, #previous]
30
+ # Navigate up, down, or through siblings.
27
31
  #
28
- # Nokogiri::XML::Node also has methods that let you move around your
29
- # tree. For navigating your tree, see:
32
+ # See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
30
33
  #
31
- # * Nokogiri::XML::Node#parent
32
- # * Nokogiri::XML::Node#children
33
- # * Nokogiri::XML::Node#next
34
- # * Nokogiri::XML::Node#previous
34
+ # == Serialization
35
35
  #
36
+ # When printing or otherwise emitting a document or a node (and its subtree), there are a few
37
+ # methods you might want to use:
36
38
  #
37
- # When printing or otherwise emitting a document or a node (and
38
- # its subtree), there are a few methods you might want to use:
39
+ # [#content, #text, #inner_text, #to_str]
40
+ # These methods will all **emit plaintext**,
41
+ # meaning that entities will be replaced (e.g., +&lt;+ will be replaced with +<+), meaning
42
+ # that any sanitizing will likely be un-done in the output.
39
43
  #
40
- # * content, text, inner_text, to_str: emit plaintext
44
+ # [#to_s, #to_xml, #to_html, #inner_html]
45
+ # These methods will all **emit properly-escaped markup**, meaning that it's suitable for
46
+ # consumption by browsers, parsers, etc.
41
47
  #
42
- # These methods will all emit the plaintext version of your
43
- # document, meaning that entities will be replaced (e.g., "&lt;"
44
- # will be replaced with "<"), meaning that any sanitizing will
45
- # likely be un-done in the output.
48
+ # See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
46
49
  #
47
- # * to_s, to_xml, to_html, inner_html: emit well-formed markup
50
+ # == Searching
48
51
  #
49
- # These methods will all emit properly-escaped markup, meaning
50
- # that it's suitable for consumption by browsers, parsers, etc.
52
+ # You may search this node's subtree using methods like #xpath and #css.
53
+ #
54
+ # See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
51
55
  #
52
- # You may search this node's subtree using Searchable#xpath and Searchable#css
53
56
  class Node
54
57
  include Nokogiri::XML::PP::Node
55
58
  include Nokogiri::XML::Searchable
59
+ include Nokogiri::ClassResolver
56
60
  include Enumerable
57
61
 
58
62
  # Element node type, see Nokogiri::XML::Node#element?
59
- ELEMENT_NODE = 1
63
+ ELEMENT_NODE = 1
60
64
  # Attribute node type
61
- ATTRIBUTE_NODE = 2
65
+ ATTRIBUTE_NODE = 2
62
66
  # Text node type, see Nokogiri::XML::Node#text?
63
- TEXT_NODE = 3
67
+ TEXT_NODE = 3
64
68
  # CDATA node type, see Nokogiri::XML::Node#cdata?
65
69
  CDATA_SECTION_NODE = 4
66
70
  # Entity reference node type
67
- ENTITY_REF_NODE = 5
71
+ ENTITY_REF_NODE = 5
68
72
  # Entity node type
69
- ENTITY_NODE = 6
73
+ ENTITY_NODE = 6
70
74
  # PI node type
71
- PI_NODE = 7
75
+ PI_NODE = 7
72
76
  # Comment node type, see Nokogiri::XML::Node#comment?
73
- COMMENT_NODE = 8
77
+ COMMENT_NODE = 8
74
78
  # Document node type, see Nokogiri::XML::Node#xml?
75
- DOCUMENT_NODE = 9
79
+ DOCUMENT_NODE = 9
76
80
  # Document type node type
77
81
  DOCUMENT_TYPE_NODE = 10
78
82
  # Document fragment node type
79
83
  DOCUMENT_FRAG_NODE = 11
80
84
  # Notation node type
81
- NOTATION_NODE = 12
85
+ NOTATION_NODE = 12
82
86
  # HTML document node type, see Nokogiri::XML::Node#html?
83
87
  HTML_DOCUMENT_NODE = 13
84
88
  # DTD node type
85
- DTD_NODE = 14
89
+ DTD_NODE = 14
86
90
  # Element declaration type
87
- ELEMENT_DECL = 15
91
+ ELEMENT_DECL = 15
88
92
  # Attribute declaration type
89
- ATTRIBUTE_DECL = 16
93
+ ATTRIBUTE_DECL = 16
90
94
  # Entity declaration type
91
- ENTITY_DECL = 17
95
+ ENTITY_DECL = 17
92
96
  # Namespace declaration type
93
- NAMESPACE_DECL = 18
97
+ NAMESPACE_DECL = 18
94
98
  # XInclude start type
95
- XINCLUDE_START = 19
99
+ XINCLUDE_START = 19
96
100
  # XInclude end type
97
- XINCLUDE_END = 20
101
+ XINCLUDE_END = 20
98
102
  # DOCB document node type
99
103
  DOCB_DOCUMENT_NODE = 21
100
104
 
101
- def initialize name, document # :nodoc:
102
- # ... Ya. This is empty on purpose.
105
+ #
106
+ # :call-seq:
107
+ # new(name, document) -> Nokogiri::XML::Node
108
+ # new(name, document) { |node| ... } -> Nokogiri::XML::Node
109
+ #
110
+ # Create a new node with +name+ that belongs to +document+.
111
+ #
112
+ # If you intend to add a node to a document tree, it's likely that you will prefer one of the
113
+ # Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
114
+ # both create an element (or subtree) and place it in the document tree.
115
+ #
116
+ # Another alternative, if you are concerned about performance, is
117
+ # Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
118
+ # attributes but (like this method) avoids parsing markup.
119
+ #
120
+ # [Parameters]
121
+ # - +name+ (String)
122
+ # - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
123
+ # [Yields] Nokogiri::XML::Node
124
+ # [Returns] Nokogiri::XML::Node
125
+ #
126
+ def initialize(name, document)
127
+ # This is intentionally empty.
103
128
  end
104
129
 
105
130
  ###
@@ -108,24 +133,7 @@ module Nokogiri
108
133
  document.decorate(self)
109
134
  end
110
135
 
111
- ###
112
- # Search this node's immediate children using CSS selector +selector+
113
- def > selector
114
- ns = document.root.namespaces
115
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
116
- end
117
-
118
- ###
119
- # Get the attribute value for the attribute +name+
120
- def [] name
121
- get(name.to_s)
122
- end
123
-
124
- ###
125
- # Set the attribute value for the attribute +name+ to +value+
126
- def []= name, value
127
- set name.to_s, value.to_s
128
- end
136
+ # :section: Manipulating Document Structure
129
137
 
130
138
  ###
131
139
  # Add +node_or_tags+ as a child of this Node.
@@ -134,12 +142,12 @@ module Nokogiri
134
142
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
135
143
  #
136
144
  # Also see related method +<<+.
137
- def add_child node_or_tags
145
+ def add_child(node_or_tags)
138
146
  node_or_tags = coerce(node_or_tags)
139
147
  if node_or_tags.is_a?(XML::NodeSet)
140
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
148
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
141
149
  else
142
- add_child_node_and_reparent_attrs node_or_tags
150
+ add_child_node_and_reparent_attrs(node_or_tags)
143
151
  end
144
152
  node_or_tags
145
153
  end
@@ -151,17 +159,16 @@ module Nokogiri
151
159
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
152
160
  #
153
161
  # Also see related method +add_child+.
154
- def prepend_child node_or_tags
155
- if first = children.first
162
+ def prepend_child(node_or_tags)
163
+ if (first = children.first)
156
164
  # Mimic the error add_child would raise.
157
- raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
165
+ raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
158
166
  first.__send__(:add_sibling, :previous, node_or_tags)
159
167
  else
160
168
  add_child(node_or_tags)
161
169
  end
162
170
  end
163
171
 
164
-
165
172
  ###
166
173
  # Add html around this node
167
174
  #
@@ -180,8 +187,8 @@ module Nokogiri
180
187
  # Returns self, to support chaining of calls (e.g., root << child1 << child2)
181
188
  #
182
189
  # Also see related method +add_child+.
183
- def << node_or_tags
184
- add_child node_or_tags
190
+ def <<(node_or_tags)
191
+ add_child(node_or_tags)
185
192
  self
186
193
  end
187
194
 
@@ -192,10 +199,11 @@ module Nokogiri
192
199
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
193
200
  #
194
201
  # Also see related method +before+.
195
- def add_previous_sibling node_or_tags
196
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
202
+ def add_previous_sibling(node_or_tags)
203
+ raise ArgumentError,
204
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
197
205
 
198
- add_sibling :previous, node_or_tags
206
+ add_sibling(:previous, node_or_tags)
199
207
  end
200
208
 
201
209
  ###
@@ -205,10 +213,11 @@ module Nokogiri
205
213
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
206
214
  #
207
215
  # Also see related method +after+.
208
- def add_next_sibling node_or_tags
209
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
216
+ def add_next_sibling(node_or_tags)
217
+ raise ArgumentError,
218
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
210
219
 
211
- add_sibling :next, node_or_tags
220
+ add_sibling(:next, node_or_tags)
212
221
  end
213
222
 
214
223
  ####
@@ -218,8 +227,8 @@ module Nokogiri
218
227
  # Returns self, to support chaining of calls.
219
228
  #
220
229
  # Also see related method +add_previous_sibling+.
221
- def before node_or_tags
222
- add_previous_sibling node_or_tags
230
+ def before(node_or_tags)
231
+ add_previous_sibling(node_or_tags)
223
232
  self
224
233
  end
225
234
 
@@ -230,8 +239,8 @@ module Nokogiri
230
239
  # Returns self, to support chaining of calls.
231
240
  #
232
241
  # Also see related method +add_next_sibling+.
233
- def after node_or_tags
234
- add_next_sibling node_or_tags
242
+ def after(node_or_tags)
243
+ add_next_sibling(node_or_tags)
235
244
  self
236
245
  end
237
246
 
@@ -239,30 +248,24 @@ module Nokogiri
239
248
  # Set the inner html for this Node to +node_or_tags+
240
249
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
241
250
  #
242
- # Returns self.
243
- #
244
251
  # Also see related method +children=+
245
- def inner_html= node_or_tags
252
+ def inner_html=(node_or_tags)
246
253
  self.children = node_or_tags
247
- self
248
254
  end
249
255
 
250
256
  ####
251
257
  # Set the inner html for this Node +node_or_tags+
252
258
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
253
259
  #
254
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
255
- #
256
260
  # Also see related method +inner_html=+
257
- def children= node_or_tags
261
+ def children=(node_or_tags)
258
262
  node_or_tags = coerce(node_or_tags)
259
263
  children.unlink
260
264
  if node_or_tags.is_a?(XML::NodeSet)
261
- node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
265
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
262
266
  else
263
- add_child_node_and_reparent_attrs node_or_tags
267
+ add_child_node_and_reparent_attrs(node_or_tags)
264
268
  end
265
- node_or_tags
266
269
  end
267
270
 
268
271
  ####
@@ -272,25 +275,27 @@ module Nokogiri
272
275
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
273
276
  #
274
277
  # Also see related method +swap+.
275
- def replace node_or_tags
278
+ def replace(node_or_tags)
279
+ raise("Cannot replace a node with no parent") unless parent
280
+
276
281
  # We cannot replace a text node directly, otherwise libxml will return
277
282
  # an internal error at parser.c:13031, I don't know exactly why
278
283
  # libxml is trying to find a parent node that is an element or document
279
284
  # so I can't tell if this is bug in libxml or not. issue #775.
280
285
  if text?
281
- replacee = Nokogiri::XML::Node.new 'dummy', document
282
- add_previous_sibling_node replacee
286
+ replacee = Nokogiri::XML::Node.new("dummy", document)
287
+ add_previous_sibling_node(replacee)
283
288
  unlink
284
- return replacee.replace node_or_tags
289
+ return replacee.replace(node_or_tags)
285
290
  end
286
291
 
287
- node_or_tags = coerce(node_or_tags)
292
+ node_or_tags = parent.coerce(node_or_tags)
288
293
 
289
294
  if node_or_tags.is_a?(XML::NodeSet)
290
- node_or_tags.each { |n| add_previous_sibling n }
295
+ node_or_tags.each { |n| add_previous_sibling(n) }
291
296
  unlink
292
297
  else
293
- replace_node node_or_tags
298
+ replace_node(node_or_tags)
294
299
  end
295
300
  node_or_tags
296
301
  end
@@ -302,44 +307,215 @@ module Nokogiri
302
307
  # Returns self, to support chaining of calls.
303
308
  #
304
309
  # Also see related method +replace+.
305
- def swap node_or_tags
306
- replace node_or_tags
310
+ def swap(node_or_tags)
311
+ replace(node_or_tags)
307
312
  self
308
313
  end
309
314
 
310
- alias :next :next_sibling
311
- alias :previous :previous_sibling
312
-
313
- # :stopdoc:
314
- # HACK: This is to work around an RDoc bug
315
- alias :next= :add_next_sibling
316
- # :startdoc:
317
-
318
- alias :previous= :add_previous_sibling
319
- alias :remove :unlink
320
- alias :get_attribute :[]
321
- alias :attr :[]
322
- alias :set_attribute :[]=
323
- alias :text :content
324
- alias :inner_text :content
325
- alias :has_attribute? :key?
326
- alias :name :node_name
327
- alias :name= :node_name=
328
- alias :type :node_type
329
- alias :to_str :text
330
- alias :clone :dup
331
- alias :elements :element_children
332
-
333
315
  ####
334
- # Returns a hash containing the node's attributes. The key is
335
- # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
336
- # representing the attribute.
337
- # If you need to distinguish attributes with the same name, with different namespaces
338
- # use #attribute_nodes instead.
316
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
317
+ def content=(string)
318
+ self.native_content = encode_special_chars(string.to_s)
319
+ end
320
+
321
+ ###
322
+ # Set the parent Node for this Node
323
+ def parent=(parent_node)
324
+ parent_node.add_child(self)
325
+ end
326
+
327
+ ###
328
+ # Adds a default namespace supplied as a string +url+ href, to self.
329
+ # The consequence is as an xmlns attribute with supplied argument were
330
+ # present in parsed XML. A default namespace set with this method will
331
+ # now show up in #attributes, but when this node is serialized to XML an
332
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
333
+ def default_namespace=(url)
334
+ add_namespace_definition(nil, url)
335
+ end
336
+
337
+ ###
338
+ # Set the default namespace on this node (as would be defined with an
339
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
340
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
341
+ # for this node. You probably want #default_namespace= instead, or perhaps
342
+ # #add_namespace_definition with a nil prefix argument.
343
+ def namespace=(ns)
344
+ return set_namespace(ns) unless ns
345
+
346
+ unless Nokogiri::XML::Namespace === ns
347
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
348
+ end
349
+ if ns.document != document
350
+ raise ArgumentError, "namespace must be declared on the same document"
351
+ end
352
+
353
+ set_namespace(ns)
354
+ end
355
+
356
+ ###
357
+ # Do xinclude substitution on the subtree below node. If given a block, a
358
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
359
+ # passed to it, allowing more convenient modification of the parser options.
360
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
361
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
362
+ yield options if block_given?
363
+
364
+ # call c extension
365
+ process_xincludes(options.to_i)
366
+ end
367
+
368
+ alias_method :next, :next_sibling
369
+ alias_method :previous, :previous_sibling
370
+ alias_method :next=, :add_next_sibling
371
+ alias_method :previous=, :add_previous_sibling
372
+ alias_method :remove, :unlink
373
+ alias_method :name=, :node_name=
374
+ alias_method :add_namespace, :add_namespace_definition
375
+
376
+ # :section:
377
+
378
+ alias_method :inner_text, :content
379
+ alias_method :text, :content
380
+ alias_method :to_str, :content
381
+ alias_method :name, :node_name
382
+ alias_method :type, :node_type
383
+ alias_method :clone, :dup
384
+ alias_method :elements, :element_children
385
+
386
+ # :section: Working With Node Attributes
387
+
388
+ # :call-seq: [](name) → (String, nil)
389
+ #
390
+ # Fetch an attribute from this node.
391
+ #
392
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
393
+ # namespaced attributes, use #attribute_with_ns.
394
+ #
395
+ # [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
396
+ #
397
+ # *Example*
398
+ #
399
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
400
+ # child = doc.at_css("child")
401
+ # child["size"] # => "large"
402
+ # child["class"] # => "big wide tall"
403
+ #
404
+ # *Example:* Namespaced attributes will not be returned.
405
+ #
406
+ # ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
407
+ #
408
+ # doc = Nokogiri::XML(<<~EOF)
409
+ # <root xmlns:width='http://example.com/widths'>
410
+ # <child width:size='broad'/>
411
+ # </root>
412
+ # EOF
413
+ # doc.at_css("child")["size"] # => nil
414
+ # doc.at_css("child").attribute("size").value # => "broad"
415
+ # doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
416
+ # # => "broad"
417
+ #
418
+ def [](name)
419
+ get(name.to_s)
420
+ end
421
+
422
+ # :call-seq: []=(name, value) → value
423
+ #
424
+ # Update the attribute +name+ to +value+, or create the attribute if it does not exist.
425
+ #
426
+ # ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
427
+ # namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
428
+ # see the example below.
429
+ #
430
+ # [Returns] +value+
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML("<root><child/></root>")
435
+ # child = doc.at_css("child")
436
+ # child["size"] = "broad"
437
+ # child.to_html
438
+ # # => "<child size=\"broad\"></child>"
439
+ #
440
+ # *Example:* Add a namespaced attribute.
441
+ #
442
+ # doc = Nokogiri::XML(<<~EOF)
443
+ # <root xmlns:width='http://example.com/widths'>
444
+ # <child/>
445
+ # </root>
446
+ # EOF
447
+ # child = doc.at_css("child")
448
+ # child["size"] = "broad"
449
+ # ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
450
+ # child.attribute("size").namespace = ns
451
+ # doc.to_html
452
+ # # => "<root xmlns:width=\"http://example.com/widths\">\n" +
453
+ # # " <child width:size=\"broad\"></child>\n" +
454
+ # # "</root>\n"
455
+ #
456
+ def []=(name, value)
457
+ set(name.to_s, value.to_s)
458
+ end
459
+
460
+ #
461
+ # :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
462
+ #
463
+ # Fetch this node's attributes.
464
+ #
465
+ # ⚠ Because the keys do not include any namespace information for the attribute, in case of a
466
+ # simple name collision, not all attributes will be returned. In this case, you will need to
467
+ # use #attribute_nodes.
468
+ #
469
+ # [Returns]
470
+ # Hash containing attributes belonging to +self+. The hash keys are String attribute
471
+ # names (without the namespace), and the hash values are Nokogiri::XML::Attr.
472
+ #
473
+ # *Example* with no namespaces:
474
+ #
475
+ # doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
476
+ # doc.at_css("child").attributes
477
+ # # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
478
+ # # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
479
+ #
480
+ # *Example* with a namespace:
481
+ #
482
+ # doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
483
+ # doc.at_css("child").attributes
484
+ # # => {"size"=>
485
+ # # #(Attr:0x550 {
486
+ # # name = "size",
487
+ # # namespace = #(Namespace:0x564 {
488
+ # # prefix = "desc",
489
+ # # href = "http://example.com/sizes"
490
+ # # }),
491
+ # # value = "large"
492
+ # # })}
493
+ #
494
+ # *Example* with an attribute name collision:
495
+ #
496
+ # ⚠ Note that only one of the attributes is returned in the Hash.
497
+ #
498
+ # doc = Nokogiri::XML(<<~EOF)
499
+ # <root xmlns:width='http://example.com/widths'
500
+ # xmlns:height='http://example.com/heights'>
501
+ # <child width:size='broad' height:size='tall'/>
502
+ # </root>
503
+ # EOF
504
+ # doc.at_css("child").attributes
505
+ # # => {"size"=>
506
+ # # #(Attr:0x550 {
507
+ # # name = "size",
508
+ # # namespace = #(Namespace:0x564 {
509
+ # # prefix = "height",
510
+ # # href = "http://example.com/heights"
511
+ # # }),
512
+ # # value = "tall"
513
+ # # })}
514
+ #
339
515
  def attributes
340
- Hash[attribute_nodes.map { |node|
341
- [node.node_name, node]
342
- }]
516
+ attribute_nodes.each_with_object({}) do |node, hash|
517
+ hash[node.node_name] = node
518
+ end
343
519
  end
344
520
 
345
521
  ###
@@ -348,6 +524,12 @@ module Nokogiri
348
524
  attribute_nodes.map(&:value)
349
525
  end
350
526
 
527
+ ###
528
+ # Does this Node's attributes include <value>
529
+ def value?(value)
530
+ values.include?(value)
531
+ end
532
+
351
533
  ###
352
534
  # Get the attribute names for this Node.
353
535
  def keys
@@ -357,97 +539,401 @@ module Nokogiri
357
539
  ###
358
540
  # Iterate over each attribute name and value pair for this Node.
359
541
  def each
360
- attribute_nodes.each { |node|
542
+ attribute_nodes.each do |node|
361
543
  yield [node.node_name, node.value]
362
- }
544
+ end
363
545
  end
364
546
 
365
547
  ###
366
- # Get the list of class names of this Node, without
367
- # deduplication or sorting.
548
+ # Remove the attribute named +name+
549
+ def remove_attribute(name)
550
+ attr = attributes[name].remove if key?(name)
551
+ clear_xpath_context if Nokogiri.jruby?
552
+ attr
553
+ end
554
+
555
+ #
556
+ # :call-seq: classes() → Array<String>
557
+ #
558
+ # Fetch CSS class names of a Node.
559
+ #
560
+ # This is a convenience function and is equivalent to:
561
+ #
562
+ # node.kwattr_values("class")
563
+ #
564
+ # See related: #kwattr_values, #add_class, #append_class, #remove_class
565
+ #
566
+ # [Returns]
567
+ # The CSS classes (Array of String) present in the Node's "class" attribute. If the
568
+ # attribute is empty or non-existent, the return value is an empty array.
569
+ #
570
+ # *Example*
571
+ #
572
+ # node # => <div class="section title header"></div>
573
+ # node.classes # => ["section", "title", "header"]
574
+ #
368
575
  def classes
369
- self['class'].to_s.scan(/\S+/)
576
+ kwattr_values("class")
370
577
  end
371
578
 
372
- ###
373
- # Add +name+ to the "class" attribute value of this Node and
374
- # return self. If the value is already in the current value, it
375
- # is not added. If no "class" attribute exists yet, one is
376
- # created with the given value.
377
579
  #
378
- # More than one class may be added at a time, separated by a
379
- # space.
380
- def add_class name
381
- names = classes
382
- self['class'] = (names + (name.scan(/\S+/) - names)).join(' ')
580
+ # :call-seq: add_class(names) self
581
+ #
582
+ # Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
583
+ # in the "class" attribute are _not_ added. Note that any existing duplicates in the
584
+ # "class" attribute are not removed. Compare with #append_class.
585
+ #
586
+ # This is a convenience function and is equivalent to:
587
+ #
588
+ # node.kwattr_add("class", names)
589
+ #
590
+ # See related: #kwattr_add, #classes, #append_class, #remove_class
591
+ #
592
+ # [Parameters]
593
+ # - +names+ (String, Array<String>)
594
+ #
595
+ # CSS class names to be added to the Node's "class" attribute. May be a string containing
596
+ # whitespace-delimited names, or an Array of String names. Any class names already present
597
+ # will not be added. Any class names not present will be added. If no "class" attribute
598
+ # exists, one is created.
599
+ #
600
+ # [Returns] +self+ (Node) for ease of chaining method calls.
601
+ #
602
+ # *Example:* Ensure that the node has CSS class "section"
603
+ #
604
+ # node # => <div></div>
605
+ # node.add_class("section") # => <div class="section"></div>
606
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
607
+ #
608
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
609
+ #
610
+ # Note that the CSS class "section" is not added because it is already present.
611
+ # Note also that the pre-existing duplicate CSS class "section" is not removed.
612
+ #
613
+ # node # => <div class="section section"></div>
614
+ # node.add_class("section header") # => <div class="section section header"></div>
615
+ #
616
+ # *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
617
+ #
618
+ # node # => <div></div>
619
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
620
+ #
621
+ def add_class(names)
622
+ kwattr_add("class", names)
623
+ end
624
+
625
+ #
626
+ # :call-seq: append_class(names) → self
627
+ #
628
+ # Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
629
+ #
630
+ # This is a convenience function and is equivalent to:
631
+ #
632
+ # node.kwattr_append("class", names)
633
+ #
634
+ # See related: #kwattr_append, #classes, #add_class, #remove_class
635
+ #
636
+ # [Parameters]
637
+ # - +names+ (String, Array<String>)
638
+ #
639
+ # CSS class names to be appended to the Node's "class" attribute. May be a string containing
640
+ # whitespace-delimited names, or an Array of String names. All class names passed in will be
641
+ # appended to the "class" attribute even if they are already present in the attribute
642
+ # value. If no "class" attribute exists, one is created.
643
+ #
644
+ # [Returns] +self+ (Node) for ease of chaining method calls.
645
+ #
646
+ # *Example:* Append "section" to the node's CSS "class" attribute
647
+ #
648
+ # node # => <div></div>
649
+ # node.append_class("section") # => <div class="section"></div>
650
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
651
+ #
652
+ # *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
653
+ #
654
+ # Note that the CSS class "section" is appended even though it is already present
655
+ #
656
+ # node # => <div class="section section"></div>
657
+ # node.append_class("section header") # => <div class="section section section header"></div>
658
+ #
659
+ # *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
660
+ #
661
+ # node # => <div></div>
662
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
663
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
664
+ #
665
+ def append_class(names)
666
+ kwattr_append("class", names)
667
+ end
668
+
669
+ # :call-seq:
670
+ # remove_class(css_classes) → self
671
+ #
672
+ # Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
673
+ # this node's "class" attribute are removed, including any multiple entries.
674
+ #
675
+ # If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
676
+ # attribute is deleted from the node.
677
+ #
678
+ # This is a convenience function and is equivalent to:
679
+ #
680
+ # node.kwattr_remove("class", css_classes)
681
+ #
682
+ # Also see #kwattr_remove, #classes, #add_class, #append_class
683
+ #
684
+ # [Parameters]
685
+ # - +css_classes+ (String, Array<String>)
686
+ #
687
+ # CSS class names to be removed from the Node's
688
+ # "class" attribute. May be a string containing whitespace-delimited names, or an Array of
689
+ # String names. Any class names already present will be removed. If no CSS classes remain,
690
+ # the "class" attribute is deleted.
691
+ #
692
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
693
+ #
694
+ # *Example*: Deleting a CSS class
695
+ #
696
+ # Note that all instances of the class "section" are removed from the "class" attribute.
697
+ #
698
+ # node # => <div class="section header section"></div>
699
+ # node.remove_class("section") # => <div class="header"></div>
700
+ #
701
+ # *Example*: Deleting the only remaining CSS class
702
+ #
703
+ # Note that the attribute is removed once there are no remaining classes.
704
+ #
705
+ # node # => <div class="section"></div>
706
+ # node.remove_class("section") # => <div></div>
707
+ #
708
+ # *Example*: Deleting multiple CSS classes
709
+ #
710
+ # Note that the "class" attribute is deleted once it's empty.
711
+ #
712
+ # node # => <div class="section header float"></div>
713
+ # node.remove_class(["section", "float"]) # => <div class="header"></div>
714
+ #
715
+ def remove_class(names = nil)
716
+ kwattr_remove("class", names)
717
+ end
718
+
719
+ # :call-seq:
720
+ # kwattr_values(attribute_name) → Array<String>
721
+ #
722
+ # Fetch values from a keyword attribute of a Node.
723
+ #
724
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
725
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
726
+ # contain CSS classes. But other keyword attributes exist, for instance
727
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
728
+ #
729
+ # See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
730
+ #
731
+ # [Parameters]
732
+ # - +attribute_name+ (String) The name of the keyword attribute to be inspected.
733
+ #
734
+ # [Returns]
735
+ # (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
736
+ # attribute is empty or non-existent, the return value is an empty array.
737
+ #
738
+ # *Example:*
739
+ #
740
+ # node # => <a rel="nofollow noopener external">link</a>
741
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
742
+ #
743
+ # Since v1.11.0
744
+ def kwattr_values(attribute_name)
745
+ keywordify(get_attribute(attribute_name) || [])
746
+ end
747
+
748
+ # :call-seq:
749
+ # kwattr_add(attribute_name, keywords) → self
750
+ #
751
+ # Ensure that values are present in a keyword attribute.
752
+ #
753
+ # Any values in +keywords+ that already exist in the Node's attribute values are _not_
754
+ # added. Note that any existing duplicates in the attribute values are not removed. Compare
755
+ # with #kwattr_append.
756
+ #
757
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
758
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
759
+ # contain CSS classes. But other keyword attributes exist, for instance
760
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
761
+ #
762
+ # See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
763
+ #
764
+ # [Parameters]
765
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
766
+ # - +keywords+ (String, Array<String>)
767
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
768
+ # whitespace-delimited values, or an Array of String values. Any values already present will
769
+ # not be added. Any values not present will be added. If the named attribute does not exist,
770
+ # it is created.
771
+ #
772
+ # [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
773
+ #
774
+ # *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
775
+ #
776
+ # Note that duplicates are not added.
777
+ #
778
+ # node # => <a></a>
779
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
780
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
781
+ #
782
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
783
+ # String argument.
784
+ #
785
+ # Note that "nofollow" is not added because it is already present. Note also that the
786
+ # pre-existing duplicate "nofollow" is not removed.
787
+ #
788
+ # node # => <a rel="nofollow nofollow"></a>
789
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
790
+ #
791
+ # *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
792
+ # an Array argument.
793
+ #
794
+ # node # => <a></a>
795
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
796
+ #
797
+ # Since v1.11.0
798
+ def kwattr_add(attribute_name, keywords)
799
+ keywords = keywordify(keywords)
800
+ current_kws = kwattr_values(attribute_name)
801
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
802
+ set_attribute(attribute_name, new_kws)
383
803
  self
384
804
  end
385
805
 
386
- ###
387
- # Append +name+ to the "class" attribute value of this Node and
388
- # return self. The value is simply appended without checking if
389
- # it is already in the current value. If no "class" attribute
390
- # exists yet, one is created with the given value.
806
+ # :call-seq:
807
+ # kwattr_append(attribute_name, keywords) self
808
+ #
809
+ # Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
810
+ # #kwattr_add.
811
+ #
812
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
813
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
814
+ # contain CSS classes. But other keyword attributes exist, for instance
815
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
816
+ #
817
+ # See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
818
+ #
819
+ # [Parameters]
820
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
821
+ # - +keywords+ (String, Array<String>)
822
+ # Keywords to be added to the attribute named +attribute_name+. May be a string containing
823
+ # whitespace-delimited values, or an Array of String values. All values passed in will be
824
+ # appended to the named attribute even if they are already present in the attribute. If the
825
+ # named attribute does not exist, it is created.
826
+ #
827
+ # [Returns] +self+ (Node) for ease of chaining method calls.
391
828
  #
392
- # More than one class may be appended at a time, separated by a
393
- # space.
394
- def append_class name
395
- self['class'] = (classes + name.scan(/\S+/)).join(' ')
829
+ # *Example:* Append "nofollow" to the +rel+ attribute.
830
+ #
831
+ # Note that duplicates are added.
832
+ #
833
+ # node # => <a></a>
834
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
835
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
836
+ #
837
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
838
+ #
839
+ # Note that "nofollow" is appended even though it is already present.
840
+ #
841
+ # node # => <a rel="nofollow"></a>
842
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
843
+ #
844
+ #
845
+ # *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
846
+ #
847
+ # node # => <a></a>
848
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
849
+ #
850
+ # Since v1.11.0
851
+ def kwattr_append(attribute_name, keywords)
852
+ keywords = keywordify(keywords)
853
+ current_kws = kwattr_values(attribute_name)
854
+ new_kws = (current_kws + keywords).join(" ")
855
+ set_attribute(attribute_name, new_kws)
396
856
  self
397
857
  end
398
858
 
399
- ###
400
- # Remove +name+ from the "class" attribute value of this Node
401
- # and return self. If there are many occurrences of the name,
402
- # they are all removed.
859
+ # :call-seq:
860
+ # kwattr_remove(attribute_name, keywords) self
403
861
  #
404
- # More than one class may be removed at a time, separated by a
405
- # space.
862
+ # Remove keywords from a keyword attribute. Any matching keywords that exist in the named
863
+ # attribute are removed, including any multiple entries.
406
864
  #
407
- # If no class name is left after removal, or when +name+ is nil,
408
- # the "class" attribute is removed from this Node.
409
- def remove_class name = nil
410
- if name
411
- names = classes - name.scan(/\S+/)
412
- if names.empty?
413
- delete 'class'
414
- else
415
- self['class'] = names.join(' ')
416
- end
865
+ # If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
866
+ # deleted from the node.
867
+ #
868
+ # A "keyword attribute" is a node attribute that contains a set of space-delimited
869
+ # values. Perhaps the most familiar example of this is the HTML "class" attribute used to
870
+ # contain CSS classes. But other keyword attributes exist, for instance
871
+ # {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
872
+ #
873
+ # See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
874
+ #
875
+ # [Parameters]
876
+ # - +attribute_name+ (String) The name of the keyword attribute to be modified.
877
+ # - +keywords+ (String, Array<String>)
878
+ # Keywords to be removed from the attribute named +attribute_name+. May be a string
879
+ # containing whitespace-delimited values, or an Array of String values. Any keywords present
880
+ # in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
881
+ # the attribute is deleted.
882
+ #
883
+ # [Returns] +self+ (Node) for ease of chaining method calls.
884
+ #
885
+ # *Example:*
886
+ #
887
+ # Note that the +rel+ attribute is deleted when empty.
888
+ #
889
+ # node # => <a rel="nofollow noreferrer">link</a>
890
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
891
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
892
+ #
893
+ # Since v1.11.0
894
+ def kwattr_remove(attribute_name, keywords)
895
+ if keywords.nil?
896
+ remove_attribute(attribute_name)
897
+ return self
898
+ end
899
+
900
+ keywords = keywordify(keywords)
901
+ current_kws = kwattr_values(attribute_name)
902
+ new_kws = current_kws - keywords
903
+ if new_kws.empty?
904
+ remove_attribute(attribute_name)
417
905
  else
418
- delete "class"
906
+ set_attribute(attribute_name, new_kws.join(" "))
419
907
  end
420
908
  self
421
909
  end
422
910
 
423
- ###
424
- # Remove the attribute named +name+
425
- def remove_attribute name
426
- attr = attributes[name].remove if key? name
427
- clear_xpath_context if Nokogiri.jruby?
428
- attr
429
- end
430
- alias :delete :remove_attribute
911
+ alias_method :delete, :remove_attribute
912
+ alias_method :get_attribute, :[]
913
+ alias_method :attr, :[]
914
+ alias_method :set_attribute, :[]=
915
+ alias_method :has_attribute?, :key?
916
+
917
+ # :section:
431
918
 
432
919
  ###
433
920
  # Returns true if this Node matches +selector+
434
- def matches? selector
921
+ def matches?(selector)
435
922
  ancestors.last.search(selector).include?(self)
436
923
  end
437
924
 
438
925
  ###
439
926
  # Create a DocumentFragment containing +tags+ that is relative to _this_
440
927
  # context node.
441
- def fragment tags
442
- type = document.html? ? Nokogiri::HTML : Nokogiri::XML
443
- type::DocumentFragment.new(document, tags, self)
928
+ def fragment(tags)
929
+ document.related_class("DocumentFragment").new(document, tags, self)
444
930
  end
445
931
 
446
932
  ###
447
933
  # Parse +string_or_io+ as a document fragment within the context of
448
934
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
449
935
  # +string_or_io+.
450
- def parse string_or_io, options = nil
936
+ def parse(string_or_io, options = nil)
451
937
  ##
452
938
  # When the current node is unparented and not an element node, use the
453
939
  # document as the parsing context instead. Otherwise, the in-context
@@ -458,61 +944,87 @@ module Nokogiri
458
944
  end
459
945
 
460
946
  options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
461
- if Integer === options
462
- options = Nokogiri::XML::ParseOptions.new(options)
463
- end
464
- # Give the options to the user
947
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
465
948
  yield options if block_given?
466
949
 
467
- contents = string_or_io.respond_to?(:read) ?
468
- string_or_io.read :
950
+ contents = if string_or_io.respond_to?(:read)
951
+ string_or_io.read
952
+ else
469
953
  string_or_io
954
+ end
470
955
 
471
956
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
472
957
 
473
- ##
474
- # This is a horrible hack, but I don't care. See #313 for background.
958
+ # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
959
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
960
+ #
961
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
962
+ # would have been inherited from the context node won't be handled correctly. This hack was
963
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
964
+ # that's not easily prevented (or even detected).
965
+ #
966
+ # I think preferable behavior would be to either:
967
+ #
968
+ # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
969
+ # b. don't recover, but raise a sensible exception
970
+ #
971
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
972
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
475
973
  error_count = document.errors.length
476
974
  node_set = in_context(contents, options.to_i)
477
- if node_set.empty? and document.errors.length > error_count and options.recover?
478
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
479
- node_set = fragment.children
975
+ if node_set.empty? && (document.errors.length > error_count)
976
+ if options.recover?
977
+ fragment = document.related_class("DocumentFragment").parse(contents)
978
+ node_set = fragment.children
979
+ else
980
+ raise document.errors[error_count]
981
+ end
480
982
  end
481
983
  node_set
482
984
  end
483
985
 
484
- ####
485
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
486
- def content= string
487
- self.native_content = encode_special_chars(string.to_s)
488
- end
489
-
490
- ###
491
- # Set the parent Node for this Node
492
- def parent= parent_node
493
- parent_node.add_child(self)
494
- parent_node
495
- end
496
-
497
- ###
498
- # Returns a Hash of +{prefix => value}+ for all namespaces on this
499
- # node and its ancestors.
986
+ # :call-seq:
987
+ # namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
500
988
  #
501
- # This method returns the same namespaces as #namespace_scopes.
989
+ # Fetch all the namespaces on this node and its ancestors.
990
+ #
991
+ # Note that the keys in this hash XML attributes that would be used to define this namespace,
992
+ # such as "xmlns:prefix", not just the prefix.
993
+ #
994
+ # The default namespace for this node will be included with key "xmlns".
995
+ #
996
+ # See also #namespace_scopes
997
+ #
998
+ # [Returns]
999
+ # Hash containing all the namespaces on this node and its ancestors. The hash keys are the
1000
+ # namespace prefix, and the hash value for each key is the namespace URI.
1001
+ #
1002
+ # *Example:*
1003
+ #
1004
+ # doc = Nokogiri::XML(<<~EOF)
1005
+ # <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
1006
+ # <first/>
1007
+ # <second xmlns="http://example.com/child"/>
1008
+ # <third xmlns:foo="http://example.com/foo"/>
1009
+ # </root>
1010
+ # EOF
1011
+ # doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
1012
+ # # => {"xmlns"=>"http://example.com/root",
1013
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1014
+ # doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
1015
+ # # => {"xmlns"=>"http://example.com/child",
1016
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
1017
+ # doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
1018
+ # # => {"xmlns:foo"=>"http://example.com/foo",
1019
+ # # "xmlns"=>"http://example.com/root",
1020
+ # # "xmlns:in_scope"=>"http://example.com/in_scope"}
502
1021
  #
503
- # Returns namespaces in scope for self -- those defined on self
504
- # element directly or any ancestor node -- as a Hash of
505
- # attribute-name/value pairs. Note that the keys in this hash
506
- # XML attributes that would be used to define this namespace,
507
- # such as "xmlns:prefix", not just the prefix. Default namespace
508
- # set on self will be included with key "xmlns". However,
509
- # default namespaces set on ancestor will NOT be, even if self
510
- # has no explicit default namespace.
511
1022
  def namespaces
512
- Hash[namespace_scopes.map { |nd|
513
- key = ['xmlns', nd.prefix].compact.join(':')
514
- [key, nd.href]
515
- }]
1023
+ namespace_scopes.each_with_object({}) do |ns, hash|
1024
+ prefix = ns.prefix
1025
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
1026
+ hash[key] = ns.href
1027
+ end
516
1028
  end
517
1029
 
518
1030
  # Returns true if this is a Comment
@@ -530,14 +1042,14 @@ module Nokogiri
530
1042
  type == DOCUMENT_NODE
531
1043
  end
532
1044
 
533
- # Returns true if this is an HTML::Document node
1045
+ # Returns true if this is an HTML4::Document or HTML5::Document node
534
1046
  def html?
535
1047
  type == HTML_DOCUMENT_NODE
536
1048
  end
537
1049
 
538
1050
  # Returns true if this is a Document
539
1051
  def document?
540
- is_a? XML::Document
1052
+ is_a?(XML::Document)
541
1053
  end
542
1054
 
543
1055
  # Returns true if this is a ProcessingInstruction node
@@ -556,11 +1068,11 @@ module Nokogiri
556
1068
  end
557
1069
 
558
1070
  ###
559
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
1071
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
560
1072
  # nil on XML documents and on unknown tags.
561
1073
  def description
562
1074
  return nil if document.xml?
563
- Nokogiri::HTML::ElementDescription[name]
1075
+ Nokogiri::HTML4::ElementDescription[name]
564
1076
  end
565
1077
 
566
1078
  ###
@@ -574,7 +1086,8 @@ module Nokogiri
574
1086
  def element?
575
1087
  type == ELEMENT_NODE
576
1088
  end
577
- alias :elem? :element?
1089
+
1090
+ alias_method :elem?, :element?
578
1091
 
579
1092
  ###
580
1093
  # Turn this node in to a string. If the document is HTML, this method
@@ -584,28 +1097,28 @@ module Nokogiri
584
1097
  end
585
1098
 
586
1099
  # Get the inner_html for this node's Node#children
587
- def inner_html *args
1100
+ def inner_html(*args)
588
1101
  children.map { |x| x.to_html(*args) }.join
589
1102
  end
590
1103
 
591
1104
  # Get the path to this node as a CSS expression
592
1105
  def css_path
593
- path.split(/\//).map { |part|
594
- part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
595
- }.compact.join(' > ')
1106
+ path.split(%r{/}).map do |part|
1107
+ part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
1108
+ end.compact.join(" > ")
596
1109
  end
597
1110
 
598
1111
  ###
599
1112
  # Get a list of ancestor Node for this Node. If +selector+ is given,
600
1113
  # the ancestors must match +selector+
601
- def ancestors selector = nil
1114
+ def ancestors(selector = nil)
602
1115
  return NodeSet.new(document) unless respond_to?(:parent)
603
1116
  return NodeSet.new(document) unless parent
604
1117
 
605
1118
  parents = [parent]
606
1119
 
607
1120
  while parents.last.respond_to?(:parent)
608
- break unless ctx_parent = parents.last.parent
1121
+ break unless (ctx_parent = parents.last.parent)
609
1122
  parents << ctx_parent
610
1123
  end
611
1124
 
@@ -614,62 +1127,43 @@ module Nokogiri
614
1127
  root = parents.last
615
1128
  search_results = root.search(selector)
616
1129
 
617
- NodeSet.new(document, parents.find_all { |parent|
1130
+ NodeSet.new(document, parents.find_all do |parent|
618
1131
  search_results.include?(parent)
619
- })
620
- end
621
-
622
- ###
623
- # Adds a default namespace supplied as a string +url+ href, to self.
624
- # The consequence is as an xmlns attribute with supplied argument were
625
- # present in parsed XML. A default namespace set with this method will
626
- # now show up in #attributes, but when this node is serialized to XML an
627
- # "xmlns" attribute will appear. See also #namespace and #namespace=
628
- def default_namespace= url
629
- add_namespace_definition(nil, url)
630
- end
631
- alias :add_namespace :add_namespace_definition
632
-
633
- ###
634
- # Set the default namespace on this node (as would be defined with an
635
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
636
- # a Namespace added this way will NOT be serialized as an xmlns attribute
637
- # for this node. You probably want #default_namespace= instead, or perhaps
638
- # #add_namespace_definition with a nil prefix argument.
639
- def namespace= ns
640
- return set_namespace(ns) unless ns
641
-
642
- unless Nokogiri::XML::Namespace === ns
643
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
644
- end
645
- if ns.document != document
646
- raise ArgumentError, 'namespace must be declared on the same document'
647
- end
648
-
649
- set_namespace ns
1132
+ end)
650
1133
  end
651
1134
 
652
1135
  ####
653
1136
  # Yields self and all children to +block+ recursively.
654
- def traverse &block
655
- children.each{|j| j.traverse(&block) }
656
- block.call(self)
1137
+ def traverse(&block)
1138
+ children.each { |j| j.traverse(&block) }
1139
+ yield(self)
657
1140
  end
658
1141
 
659
1142
  ###
660
1143
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
661
- def accept visitor
1144
+ def accept(visitor)
662
1145
  visitor.visit(self)
663
1146
  end
664
1147
 
665
1148
  ###
666
1149
  # Test to see if this Node is equal to +other+
667
- def == other
1150
+ def ==(other)
668
1151
  return false unless other
669
1152
  return false unless other.respond_to?(:pointer_id)
670
1153
  pointer_id == other.pointer_id
671
1154
  end
672
1155
 
1156
+ ###
1157
+ # Compare two Node objects with respect to their Document. Nodes from
1158
+ # different documents cannot be compared.
1159
+ def <=>(other)
1160
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1161
+ return nil unless document == other.document
1162
+ compare(other)
1163
+ end
1164
+
1165
+ # :section: Serialization and Generating Output
1166
+
673
1167
  ###
674
1168
  # Serialize Node using +options+. Save options can also be set using a
675
1169
  # block. See SaveOptions.
@@ -684,19 +1178,23 @@ module Nokogiri
684
1178
  # config.format.as_xml
685
1179
  # end
686
1180
  #
687
- def serialize *args, &block
688
- options = args.first.is_a?(Hash) ? args.shift : {
689
- :encoding => args[0],
690
- :save_with => args[1]
691
- }
1181
+ def serialize(*args, &block)
1182
+ options = if args.first.is_a?(Hash)
1183
+ args.shift
1184
+ else
1185
+ {
1186
+ encoding: args[0],
1187
+ save_with: args[1],
1188
+ }
1189
+ end
692
1190
 
693
1191
  encoding = options[:encoding] || document.encoding
694
1192
  options[:encoding] = encoding
695
1193
 
696
- outstring = String.new
697
- outstring.force_encoding(Encoding.find(encoding || 'utf-8'))
1194
+ outstring = +""
1195
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
698
1196
  io = StringIO.new(outstring)
699
- write_to io, options, &block
1197
+ write_to(io, options, &block)
700
1198
  io.string
701
1199
  end
702
1200
 
@@ -707,8 +1205,8 @@ module Nokogiri
707
1205
  #
708
1206
  # See Node#write_to for a list of +options+. For formatted output,
709
1207
  # use Node#to_xhtml instead.
710
- def to_html options = {}
711
- to_format SaveOptions::DEFAULT_HTML, options
1208
+ def to_html(options = {})
1209
+ to_format(SaveOptions::DEFAULT_HTML, options)
712
1210
  end
713
1211
 
714
1212
  ###
@@ -717,7 +1215,7 @@ module Nokogiri
717
1215
  # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
718
1216
  #
719
1217
  # See Node#write_to for a list of +options+
720
- def to_xml options = {}
1218
+ def to_xml(options = {})
721
1219
  options[:save_with] ||= SaveOptions::DEFAULT_XML
722
1220
  serialize(options)
723
1221
  end
@@ -728,8 +1226,8 @@ module Nokogiri
728
1226
  # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
729
1227
  #
730
1228
  # See Node#write_to for a list of +options+
731
- def to_xhtml options = {}
732
- to_format SaveOptions::DEFAULT_XHTML, options
1229
+ def to_xhtml(options = {})
1230
+ to_format(SaveOptions::DEFAULT_XHTML, options)
733
1231
  end
734
1232
 
735
1233
  ###
@@ -749,38 +1247,43 @@ module Nokogiri
749
1247
  #
750
1248
  # node.write_to(io, :indent_text => '-', :indent => 2)
751
1249
  #
752
- def write_to io, *options
753
- options = options.first.is_a?(Hash) ? options.shift : {}
754
- encoding = options[:encoding] || options[0]
1250
+ def write_to(io, *options)
1251
+ options = options.first.is_a?(Hash) ? options.shift : {}
1252
+ encoding = options[:encoding] || options[0]
755
1253
  if Nokogiri.jruby?
756
- save_options = options[:save_with] || options[1]
757
- indent_times = options[:indent] || 0
1254
+ save_options = options[:save_with] || options[1]
1255
+ indent_times = options[:indent] || 0
758
1256
  else
759
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
760
- indent_times = options[:indent] || 2
1257
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1258
+ indent_times = options[:indent] || 2
761
1259
  end
762
- indent_text = options[:indent_text] || ' '
1260
+ indent_text = options[:indent_text] || " "
1261
+
1262
+ # Any string times 0 returns an empty string. Therefore, use the same
1263
+ # string instead of generating a new empty string for every node with
1264
+ # zero indentation.
1265
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
763
1266
 
764
1267
  config = SaveOptions.new(save_options.to_i)
765
1268
  yield config if block_given?
766
1269
 
767
- native_write_to(io, encoding, indent_text * indent_times, config.options)
1270
+ native_write_to(io, encoding, indentation, config.options)
768
1271
  end
769
1272
 
770
1273
  ###
771
1274
  # Write Node as HTML to +io+ with +options+
772
1275
  #
773
1276
  # See Node#write_to for a list of +options+
774
- def write_html_to io, options = {}
775
- write_format_to SaveOptions::DEFAULT_HTML, io, options
1277
+ def write_html_to(io, options = {})
1278
+ write_format_to(SaveOptions::DEFAULT_HTML, io, options)
776
1279
  end
777
1280
 
778
1281
  ###
779
1282
  # Write Node as XHTML to +io+ with +options+
780
1283
  #
781
1284
  # See Node#write_to for a list of +options+
782
- def write_xhtml_to io, options = {}
783
- write_format_to SaveOptions::DEFAULT_XHTML, io, options
1285
+ def write_xhtml_to(io, options = {})
1286
+ write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
784
1287
  end
785
1288
 
786
1289
  ###
@@ -789,110 +1292,105 @@ module Nokogiri
789
1292
  # doc.write_xml_to io, :encoding => 'UTF-8'
790
1293
  #
791
1294
  # See Node#write_to for a list of options
792
- def write_xml_to io, options = {}
1295
+ def write_xml_to(io, options = {})
793
1296
  options[:save_with] ||= SaveOptions::DEFAULT_XML
794
- write_to io, options
1297
+ write_to(io, options)
795
1298
  end
796
1299
 
797
- ###
798
- # Compare two Node objects with respect to their Document. Nodes from
799
- # different documents cannot be compared.
800
- def <=> other
801
- return nil unless other.is_a?(Nokogiri::XML::Node)
802
- return nil unless document == other.document
803
- compare other
1300
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1301
+ c14n_root = self
1302
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1303
+ tn = node.is_a?(XML::Node) ? node : parent
1304
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1305
+ end
804
1306
  end
805
1307
 
806
- ###
807
- # Do xinclude substitution on the subtree below node. If given a block, a
808
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
809
- # passed to it, allowing more convenient modification of the parser options.
810
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML
811
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
1308
+ # :section:
812
1309
 
813
- # give options to user
814
- yield options if block_given?
1310
+ protected
815
1311
 
816
- # call c extension
817
- process_xincludes(options.to_i)
1312
+ def coerce(data)
1313
+ case data
1314
+ when XML::NodeSet
1315
+ return data
1316
+ when XML::DocumentFragment
1317
+ return data.children
1318
+ when String
1319
+ return fragment(data).children
1320
+ when Document, XML::Attr
1321
+ # unacceptable
1322
+ when XML::Node
1323
+ return data
1324
+ end
1325
+
1326
+ raise ArgumentError, <<~EOERR
1327
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1328
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1329
+ EOERR
818
1330
  end
819
1331
 
820
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
821
- c14n_root = self
822
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
823
- tn = node.is_a?(XML::Node) ? node : parent
824
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1332
+ private
1333
+
1334
+ def keywordify(keywords)
1335
+ case keywords
1336
+ when Enumerable
1337
+ keywords
1338
+ when String
1339
+ keywords.scan(/\S+/)
1340
+ else
1341
+ raise ArgumentError,
1342
+ "Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
825
1343
  end
826
1344
  end
827
1345
 
828
- private
1346
+ def add_sibling(next_or_previous, node_or_tags)
1347
+ raise("Cannot add sibling to a node with no parent") unless parent
829
1348
 
830
- def add_sibling next_or_previous, node_or_tags
831
- impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
832
- iter = (next_or_previous == :next) ? :reverse_each : :each
1349
+ impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
1350
+ iter = next_or_previous == :next ? :reverse_each : :each
833
1351
 
834
- node_or_tags = coerce node_or_tags
1352
+ node_or_tags = parent.coerce(node_or_tags)
835
1353
  if node_or_tags.is_a?(XML::NodeSet)
836
1354
  if text?
837
- pivot = Nokogiri::XML::Node.new 'dummy', document
838
- send impl, pivot
1355
+ pivot = Nokogiri::XML::Node.new("dummy", document)
1356
+ send(impl, pivot)
839
1357
  else
840
1358
  pivot = self
841
1359
  end
842
- node_or_tags.send(iter) { |n| pivot.send impl, n }
1360
+ node_or_tags.send(iter) { |n| pivot.send(impl, n) }
843
1361
  pivot.unlink if text?
844
1362
  else
845
- send impl, node_or_tags
1363
+ send(impl, node_or_tags)
846
1364
  end
847
1365
  node_or_tags
848
1366
  end
849
1367
 
850
- def to_format save_option, options
851
- # FIXME: this is a hack around broken libxml versions
852
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1368
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1369
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1370
+
1371
+ def to_format(save_option, options)
1372
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
853
1373
 
854
1374
  options[:save_with] = save_option unless options[:save_with]
855
1375
  serialize(options)
856
1376
  end
857
1377
 
858
- def write_format_to save_option, io, options
859
- # FIXME: this is a hack around broken libxml versions
860
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1378
+ def write_format_to(save_option, io, options)
1379
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
861
1380
 
862
1381
  options[:save_with] ||= save_option
863
- write_to io, options
1382
+ write_to(io, options)
864
1383
  end
865
1384
 
866
1385
  def inspect_attributes
867
1386
  [:name, :namespace, :attribute_nodes, :children]
868
1387
  end
869
1388
 
870
- def coerce data # :nodoc:
871
- case data
872
- when XML::NodeSet
873
- return data
874
- when XML::DocumentFragment
875
- return data.children
876
- when String
877
- return fragment(data).children
878
- when Document, XML::Attr
879
- # unacceptable
880
- when XML::Node
881
- return data
882
- end
1389
+ IMPLIED_XPATH_CONTEXTS = [".//"].freeze
883
1390
 
884
- raise ArgumentError, <<-EOERR
885
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
886
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
887
- EOERR
888
- end
889
-
890
- # @private
891
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
892
-
893
- def add_child_node_and_reparent_attrs node # :nodoc:
894
- add_child_node node
895
- node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
1391
+ def add_child_node_and_reparent_attrs(node)
1392
+ add_child_node(node)
1393
+ node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
896
1394
  attr_node.remove
897
1395
  node[attr_node.name] = attr_node.value
898
1396
  end
@@ -900,3 +1398,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
900
1398
  end
901
1399
  end
902
1400
  end
1401
+
1402
+ require_relative "node/save_options"