nokogiri 1.10.3 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +112 -33
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +105 -103
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +74 -32
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  173. metadata +151 -153
  174. data/ext/nokogiri/html_document.c +0 -170
  175. data/ext/nokogiri/html_document.h +0 -10
  176. data/ext/nokogiri/html_element_description.c +0 -279
  177. data/ext/nokogiri/html_element_description.h +0 -10
  178. data/ext/nokogiri/html_entity_lookup.c +0 -32
  179. data/ext/nokogiri/html_entity_lookup.h +0 -8
  180. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  181. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  182. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  183. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_attr.h +0 -9
  185. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  186. data/ext/nokogiri/xml_cdata.h +0 -9
  187. data/ext/nokogiri/xml_comment.h +0 -9
  188. data/ext/nokogiri/xml_document.h +0 -23
  189. data/ext/nokogiri/xml_document_fragment.h +0 -10
  190. data/ext/nokogiri/xml_dtd.h +0 -10
  191. data/ext/nokogiri/xml_element_content.h +0 -10
  192. data/ext/nokogiri/xml_element_decl.h +0 -9
  193. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  194. data/ext/nokogiri/xml_entity_decl.h +0 -10
  195. data/ext/nokogiri/xml_entity_reference.h +0 -9
  196. data/ext/nokogiri/xml_io.c +0 -61
  197. data/ext/nokogiri/xml_io.h +0 -11
  198. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  199. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  200. data/ext/nokogiri/xml_namespace.h +0 -14
  201. data/ext/nokogiri/xml_node.h +0 -13
  202. data/ext/nokogiri/xml_node_set.h +0 -12
  203. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  204. data/ext/nokogiri/xml_reader.h +0 -10
  205. data/ext/nokogiri/xml_relax_ng.h +0 -9
  206. data/ext/nokogiri/xml_sax_parser.h +0 -39
  207. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  208. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  209. data/ext/nokogiri/xml_schema.h +0 -9
  210. data/ext/nokogiri/xml_syntax_error.h +0 -13
  211. data/ext/nokogiri/xml_text.h +0 -9
  212. data/ext/nokogiri/xml_xpath_context.h +0 -10
  213. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  214. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  215. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  216. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  217. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  218. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,105 +1,102 @@
1
1
  # encoding: UTF-8
2
- require 'stringio'
3
- require 'nokogiri/xml/node/save_options'
2
+ # frozen_string_literal: true
3
+ require "stringio"
4
4
 
5
5
  module Nokogiri
6
6
  module XML
7
- ####
8
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
9
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
10
- # to a hash with regard to attributes. For example (from irb):
7
+ ##
8
+ # {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
9
+ # tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
10
+ # example:
11
11
  #
12
- # irb(main):004:0> node
13
- # => <a href="#foo" id="link">link</a>
14
- # irb(main):005:0> node['href']
15
- # => "#foo"
16
- # irb(main):006:0> node.keys
17
- # => ["href", "id"]
18
- # irb(main):007:0> node.values
19
- # => ["#foo", "link"]
20
- # irb(main):008:0> node['class'] = 'green'
21
- # => "green"
22
- # irb(main):009:0> node
23
- # => <a href="#foo" id="link" class="green">link</a>
24
- # irb(main):010:0>
12
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
13
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
14
+ # node['href'] # => "#foo"
15
+ # node.keys # => ["href", "id"]
16
+ # node.values # => ["#foo", "link"]
17
+ # node['class'] = 'green' # => "green"
18
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
25
19
  #
26
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
20
+ # See the method group entitled "Working With Node Attributes" for the full set of methods.
27
21
  #
28
- # Nokogiri::XML::Node also has methods that let you move around your
22
+ # {Nokogiri::XML::Node} also has methods that let you move around your
29
23
  # tree. For navigating your tree, see:
30
24
  #
31
- # * Nokogiri::XML::Node#parent
32
- # * Nokogiri::XML::Node#children
33
- # * Nokogiri::XML::Node#next
34
- # * Nokogiri::XML::Node#previous
35
- #
25
+ # * {#parent}
26
+ # * {#children}
27
+ # * {#next}
28
+ # * {#previous}
36
29
  #
37
30
  # When printing or otherwise emitting a document or a node (and
38
31
  # its subtree), there are a few methods you might want to use:
39
32
  #
40
- # * content, text, inner_text, to_str: emit plaintext
41
- #
42
- # These methods will all emit the plaintext version of your
43
- # document, meaning that entities will be replaced (e.g., "&lt;"
44
- # will be replaced with "<"), meaning that any sanitizing will
45
- # likely be un-done in the output.
33
+ # * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
34
+ # meaning that entities will be replaced (e.g., "&lt;" will be replaced with "<"), meaning
35
+ # that any sanitizing will likely be un-done in the output.
46
36
  #
47
- # * to_s, to_xml, to_html, inner_html: emit well-formed markup
37
+ # * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
38
+ # properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
39
+ # parsers, etc.
48
40
  #
49
- # These methods will all emit properly-escaped markup, meaning
50
- # that it's suitable for consumption by browsers, parsers, etc.
41
+ # You may search this node's subtree using {#xpath} and {#css}
51
42
  #
52
- # You may search this node's subtree using Searchable#xpath and Searchable#css
53
43
  class Node
54
44
  include Nokogiri::XML::PP::Node
55
45
  include Nokogiri::XML::Searchable
56
46
  include Enumerable
57
47
 
58
- # Element node type, see Nokogiri::XML::Node#element?
59
- ELEMENT_NODE = 1
48
+ # Element node type, see {Nokogiri::XML::Node#element?}
49
+ ELEMENT_NODE = 1
60
50
  # Attribute node type
61
- ATTRIBUTE_NODE = 2
62
- # Text node type, see Nokogiri::XML::Node#text?
63
- TEXT_NODE = 3
64
- # CDATA node type, see Nokogiri::XML::Node#cdata?
51
+ ATTRIBUTE_NODE = 2
52
+ # Text node type, see {Nokogiri::XML::Node#text?}
53
+ TEXT_NODE = 3
54
+ # CDATA node type, see {Nokogiri::XML::Node#cdata?}
65
55
  CDATA_SECTION_NODE = 4
66
56
  # Entity reference node type
67
- ENTITY_REF_NODE = 5
57
+ ENTITY_REF_NODE = 5
68
58
  # Entity node type
69
- ENTITY_NODE = 6
59
+ ENTITY_NODE = 6
70
60
  # PI node type
71
- PI_NODE = 7
72
- # Comment node type, see Nokogiri::XML::Node#comment?
73
- COMMENT_NODE = 8
74
- # Document node type, see Nokogiri::XML::Node#xml?
75
- DOCUMENT_NODE = 9
61
+ PI_NODE = 7
62
+ # Comment node type, see {Nokogiri::XML::Node#comment?}
63
+ COMMENT_NODE = 8
64
+ # Document node type, see {Nokogiri::XML::Node#xml?}
65
+ DOCUMENT_NODE = 9
76
66
  # Document type node type
77
67
  DOCUMENT_TYPE_NODE = 10
78
68
  # Document fragment node type
79
69
  DOCUMENT_FRAG_NODE = 11
80
70
  # Notation node type
81
- NOTATION_NODE = 12
82
- # HTML document node type, see Nokogiri::XML::Node#html?
71
+ NOTATION_NODE = 12
72
+ # HTML document node type, see {Nokogiri::XML::Node#html?}
83
73
  HTML_DOCUMENT_NODE = 13
84
74
  # DTD node type
85
- DTD_NODE = 14
75
+ DTD_NODE = 14
86
76
  # Element declaration type
87
- ELEMENT_DECL = 15
77
+ ELEMENT_DECL = 15
88
78
  # Attribute declaration type
89
- ATTRIBUTE_DECL = 16
79
+ ATTRIBUTE_DECL = 16
90
80
  # Entity declaration type
91
- ENTITY_DECL = 17
81
+ ENTITY_DECL = 17
92
82
  # Namespace declaration type
93
- NAMESPACE_DECL = 18
83
+ NAMESPACE_DECL = 18
94
84
  # XInclude start type
95
- XINCLUDE_START = 19
85
+ XINCLUDE_START = 19
96
86
  # XInclude end type
97
- XINCLUDE_END = 20
87
+ XINCLUDE_END = 20
98
88
  # DOCB document node type
99
89
  DOCB_DOCUMENT_NODE = 21
100
90
 
101
- def initialize name, document # :nodoc:
102
- # ... Ya. This is empty on purpose.
91
+ ##
92
+ # Create a new node with +name+ sharing GC lifecycle with +document+.
93
+ # @param name [String]
94
+ # @param document [Nokogiri::XML::Document]
95
+ # @yieldparam node [Nokogiri::XML::Node]
96
+ # @return [Nokogiri::XML::Node]
97
+ # @see Nokogiri::XML::Node.new
98
+ def initialize(name, document)
99
+ # This is intentionally empty.
103
100
  end
104
101
 
105
102
  ###
@@ -108,24 +105,18 @@ module Nokogiri
108
105
  document.decorate(self)
109
106
  end
110
107
 
108
+ # @!group Searching via XPath or CSS Queries
109
+
111
110
  ###
112
111
  # Search this node's immediate children using CSS selector +selector+
113
- def > selector
112
+ def >(selector)
114
113
  ns = document.root.namespaces
115
114
  xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
116
115
  end
117
116
 
118
- ###
119
- # Get the attribute value for the attribute +name+
120
- def [] name
121
- get(name.to_s)
122
- end
117
+ # @!endgroup
123
118
 
124
- ###
125
- # Set the attribute value for the attribute +name+ to +value+
126
- def []= name, value
127
- set name.to_s, value.to_s
128
- end
119
+ # @!group Manipulating Document Structure
129
120
 
130
121
  ###
131
122
  # Add +node_or_tags+ as a child of this Node.
@@ -134,7 +125,7 @@ module Nokogiri
134
125
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
135
126
  #
136
127
  # Also see related method +<<+.
137
- def add_child node_or_tags
128
+ def add_child(node_or_tags)
138
129
  node_or_tags = coerce(node_or_tags)
139
130
  if node_or_tags.is_a?(XML::NodeSet)
140
131
  node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
@@ -151,7 +142,7 @@ module Nokogiri
151
142
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
152
143
  #
153
144
  # Also see related method +add_child+.
154
- def prepend_child node_or_tags
145
+ def prepend_child(node_or_tags)
155
146
  if first = children.first
156
147
  # Mimic the error add_child would raise.
157
148
  raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
@@ -161,7 +152,6 @@ module Nokogiri
161
152
  end
162
153
  end
163
154
 
164
-
165
155
  ###
166
156
  # Add html around this node
167
157
  #
@@ -180,7 +170,7 @@ module Nokogiri
180
170
  # Returns self, to support chaining of calls (e.g., root << child1 << child2)
181
171
  #
182
172
  # Also see related method +add_child+.
183
- def << node_or_tags
173
+ def <<(node_or_tags)
184
174
  add_child node_or_tags
185
175
  self
186
176
  end
@@ -192,7 +182,7 @@ module Nokogiri
192
182
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
193
183
  #
194
184
  # Also see related method +before+.
195
- def add_previous_sibling node_or_tags
185
+ def add_previous_sibling(node_or_tags)
196
186
  raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
197
187
 
198
188
  add_sibling :previous, node_or_tags
@@ -205,7 +195,7 @@ module Nokogiri
205
195
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
206
196
  #
207
197
  # Also see related method +after+.
208
- def add_next_sibling node_or_tags
198
+ def add_next_sibling(node_or_tags)
209
199
  raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
210
200
 
211
201
  add_sibling :next, node_or_tags
@@ -218,7 +208,7 @@ module Nokogiri
218
208
  # Returns self, to support chaining of calls.
219
209
  #
220
210
  # Also see related method +add_previous_sibling+.
221
- def before node_or_tags
211
+ def before(node_or_tags)
222
212
  add_previous_sibling node_or_tags
223
213
  self
224
214
  end
@@ -230,7 +220,7 @@ module Nokogiri
230
220
  # Returns self, to support chaining of calls.
231
221
  #
232
222
  # Also see related method +add_next_sibling+.
233
- def after node_or_tags
223
+ def after(node_or_tags)
234
224
  add_next_sibling node_or_tags
235
225
  self
236
226
  end
@@ -242,7 +232,7 @@ module Nokogiri
242
232
  # Returns self.
243
233
  #
244
234
  # Also see related method +children=+
245
- def inner_html= node_or_tags
235
+ def inner_html=(node_or_tags)
246
236
  self.children = node_or_tags
247
237
  self
248
238
  end
@@ -254,7 +244,7 @@ module Nokogiri
254
244
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
255
245
  #
256
246
  # Also see related method +inner_html=+
257
- def children= node_or_tags
247
+ def children=(node_or_tags)
258
248
  node_or_tags = coerce(node_or_tags)
259
249
  children.unlink
260
250
  if node_or_tags.is_a?(XML::NodeSet)
@@ -272,19 +262,21 @@ module Nokogiri
272
262
  # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
273
263
  #
274
264
  # Also see related method +swap+.
275
- def replace node_or_tags
265
+ def replace(node_or_tags)
266
+ raise("Cannot replace a node with no parent") unless parent
267
+
276
268
  # We cannot replace a text node directly, otherwise libxml will return
277
269
  # an internal error at parser.c:13031, I don't know exactly why
278
270
  # libxml is trying to find a parent node that is an element or document
279
271
  # so I can't tell if this is bug in libxml or not. issue #775.
280
272
  if text?
281
- replacee = Nokogiri::XML::Node.new 'dummy', document
273
+ replacee = Nokogiri::XML::Node.new "dummy", document
282
274
  add_previous_sibling_node replacee
283
275
  unlink
284
276
  return replacee.replace node_or_tags
285
277
  end
286
278
 
287
- node_or_tags = coerce(node_or_tags)
279
+ node_or_tags = parent.coerce(node_or_tags)
288
280
 
289
281
  if node_or_tags.is_a?(XML::NodeSet)
290
282
  node_or_tags.each { |n| add_previous_sibling n }
@@ -302,33 +294,98 @@ module Nokogiri
302
294
  # Returns self, to support chaining of calls.
303
295
  #
304
296
  # Also see related method +replace+.
305
- def swap node_or_tags
297
+ def swap(node_or_tags)
306
298
  replace node_or_tags
307
299
  self
308
300
  end
309
301
 
310
- alias :next :next_sibling
311
- alias :previous :previous_sibling
302
+ ####
303
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
304
+ def content=(string)
305
+ self.native_content = encode_special_chars(string.to_s)
306
+ end
307
+
308
+ ###
309
+ # Set the parent Node for this Node
310
+ def parent=(parent_node)
311
+ parent_node.add_child(self)
312
+ parent_node
313
+ end
312
314
 
313
- # :stopdoc:
314
- # HACK: This is to work around an RDoc bug
315
- alias :next= :add_next_sibling
316
- # :startdoc:
315
+ ###
316
+ # Adds a default namespace supplied as a string +url+ href, to self.
317
+ # The consequence is as an xmlns attribute with supplied argument were
318
+ # present in parsed XML. A default namespace set with this method will
319
+ # now show up in #attributes, but when this node is serialized to XML an
320
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
321
+ def default_namespace=(url)
322
+ add_namespace_definition(nil, url)
323
+ end
317
324
 
318
- alias :previous= :add_previous_sibling
319
- alias :remove :unlink
320
- alias :get_attribute :[]
321
- alias :attr :[]
322
- alias :set_attribute :[]=
323
- alias :text :content
324
- alias :inner_text :content
325
- alias :has_attribute? :key?
326
- alias :name :node_name
327
- alias :name= :node_name=
328
- alias :type :node_type
329
- alias :to_str :text
330
- alias :clone :dup
331
- alias :elements :element_children
325
+ ###
326
+ # Set the default namespace on this node (as would be defined with an
327
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
328
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
329
+ # for this node. You probably want #default_namespace= instead, or perhaps
330
+ # #add_namespace_definition with a nil prefix argument.
331
+ def namespace=(ns)
332
+ return set_namespace(ns) unless ns
333
+
334
+ unless Nokogiri::XML::Namespace === ns
335
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
336
+ end
337
+ if ns.document != document
338
+ raise ArgumentError, "namespace must be declared on the same document"
339
+ end
340
+
341
+ set_namespace ns
342
+ end
343
+
344
+ ###
345
+ # Do xinclude substitution on the subtree below node. If given a block, a
346
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
347
+ # passed to it, allowing more convenient modification of the parser options.
348
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
349
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
350
+
351
+ # give options to user
352
+ yield options if block_given?
353
+
354
+ # call c extension
355
+ process_xincludes(options.to_i)
356
+ end
357
+
358
+ alias :next :next_sibling
359
+ alias :previous :previous_sibling
360
+ alias :next= :add_next_sibling
361
+ alias :previous= :add_previous_sibling
362
+ alias :remove :unlink
363
+ alias :name= :node_name=
364
+ alias :add_namespace :add_namespace_definition
365
+
366
+ # @!endgroup
367
+
368
+ alias :text :content
369
+ alias :inner_text :content
370
+ alias :name :node_name
371
+ alias :type :node_type
372
+ alias :to_str :text
373
+ alias :clone :dup
374
+ alias :elements :element_children
375
+
376
+ # @!group Working With Node Attributes
377
+
378
+ ###
379
+ # Get the attribute value for the attribute +name+
380
+ def [](name)
381
+ get(name.to_s)
382
+ end
383
+
384
+ ###
385
+ # Set the attribute value for the attribute +name+ to +value+
386
+ def []=(name, value)
387
+ set name.to_s, value.to_s
388
+ end
332
389
 
333
390
  ####
334
391
  # Returns a hash containing the node's attributes. The key is
@@ -337,9 +394,9 @@ module Nokogiri
337
394
  # If you need to distinguish attributes with the same name, with different namespaces
338
395
  # use #attribute_nodes instead.
339
396
  def attributes
340
- Hash[attribute_nodes.map { |node|
341
- [node.node_name, node]
342
- }]
397
+ attribute_nodes.each_with_object({}) do |node, hash|
398
+ hash[node.node_name] = node
399
+ end
343
400
  end
344
401
 
345
402
  ###
@@ -348,6 +405,12 @@ module Nokogiri
348
405
  attribute_nodes.map(&:value)
349
406
  end
350
407
 
408
+ ###
409
+ # Does this Node's attributes include <value>
410
+ def value?(value)
411
+ values.include? value
412
+ end
413
+
351
414
  ###
352
415
  # Get the attribute names for this Node.
353
416
  def keys
@@ -363,82 +426,366 @@ module Nokogiri
363
426
  end
364
427
 
365
428
  ###
366
- # Get the list of class names of this Node, without
367
- # deduplication or sorting.
429
+ # Remove the attribute named +name+
430
+ def remove_attribute(name)
431
+ attr = attributes[name].remove if key? name
432
+ clear_xpath_context if Nokogiri.jruby?
433
+ attr
434
+ end
435
+
436
+ # Get the CSS class names of a Node.
437
+ #
438
+ # This is a convenience function and is equivalent to:
439
+ # node.kwattr_values("class")
440
+ #
441
+ # @see #kwattr_values
442
+ # @see #add_class
443
+ # @see #append_class
444
+ # @see #remove_class
445
+ #
446
+ # @return [Array<String>]
447
+ #
448
+ # The CSS classes present in the Node's +class+ attribute. If
449
+ # the attribute is empty or non-existent, the return value is
450
+ # an empty array.
451
+ #
452
+ # @example
453
+ # node # => <div class="section title header"></div>
454
+ # node.classes # => ["section", "title", "header"]
455
+ #
368
456
  def classes
369
- self['class'].to_s.scan(/\S+/)
457
+ kwattr_values("class")
370
458
  end
371
459
 
372
- ###
373
- # Add +name+ to the "class" attribute value of this Node and
374
- # return self. If the value is already in the current value, it
375
- # is not added. If no "class" attribute exists yet, one is
376
- # created with the given value.
460
+ # Ensure HTML CSS classes are present on a +Node+. Any CSS
461
+ # classes in +names+ that already exist in the +Node+'s +class+
462
+ # attribute are _not_ added. Note that any existing duplicates
463
+ # in the +class+ attribute are not removed. Compare with
464
+ # {#append_class}.
465
+ #
466
+ # This is a convenience function and is equivalent to:
467
+ # node.kwattr_add("class", names)
468
+ #
469
+ # @see #kwattr_add
470
+ # @see #classes
471
+ # @see #append_class
472
+ # @see #remove_class
473
+ #
474
+ # @param names [String, Array<String>]
475
+ #
476
+ # CSS class names to be added to the Node's +class+
477
+ # attribute. May be a string containing whitespace-delimited
478
+ # names, or an Array of String names. Any class names already
479
+ # present will not be added. Any class names not present will
480
+ # be added. If no +class+ attribute exists, one is created.
481
+ #
482
+ # @return [Node] Returns +self+ for ease of chaining method calls.
483
+ #
484
+ # @example Ensure that a +Node+ has CSS class "section"
485
+ # node # => <div></div>
486
+ # node.add_class("section") # => <div class="section"></div>
487
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
488
+ #
489
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
490
+ # node # => <div class="section section"></div>
491
+ # node.add_class("section header") # => <div class="section section header"></div>
492
+ # # Note that the CSS class "section" is not added because it is already present.
493
+ # # Note also that the pre-existing duplicate CSS class "section" is not removed.
494
+ #
495
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
496
+ # node # => <div></div>
497
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
498
+ #
499
+ def add_class(names)
500
+ kwattr_add("class", names)
501
+ end
502
+
503
+ # Add HTML CSS classes to a +Node+, regardless of
504
+ # duplication. Compare with {#add_class}.
505
+ #
506
+ # This is a convenience function and is equivalent to:
507
+ # node.kwattr_append("class", names)
508
+ #
509
+ # @see #kwattr_append
510
+ # @see #classes
511
+ # @see #add_class
512
+ # @see #remove_class
513
+ #
514
+ # @param names [String, Array<String>]
515
+ #
516
+ # CSS class names to be appended to the Node's +class+
517
+ # attribute. May be a string containing whitespace-delimited
518
+ # names, or an Array of String names. All class names passed
519
+ # in will be appended to the +class+ attribute even if they
520
+ # are already present in the attribute value. If no +class+
521
+ # attribute exists, one is created.
522
+ #
523
+ # @return [Node] Returns +self+ for ease of chaining method calls.
524
+ #
525
+ # @example Append "section" to a +Node+'s CSS +class+ attriubute
526
+ # node # => <div></div>
527
+ # node.append_class("section") # => <div class="section"></div>
528
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
529
+ #
530
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
531
+ # node # => <div class="section section"></div>
532
+ # node.append_class("section header") # => <div class="section section section header"></div>
533
+ # # Note that the CSS class "section" is appended even though it is already present.
534
+ #
535
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
536
+ # node # => <div></div>
537
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
538
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
377
539
  #
378
- # More than one class may be added at a time, separated by a
379
- # space.
380
- def add_class name
381
- names = classes
382
- self['class'] = (names + (name.scan(/\S+/) - names)).join(' ')
540
+ def append_class(names)
541
+ kwattr_append("class", names)
542
+ end
543
+
544
+ # Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
545
+ # exist in the +Node+'s +class+ attribute are removed, including any
546
+ # multiple entries.
547
+ #
548
+ # If no CSS classes remain after this operation, or if +names+ is
549
+ # +nil+, the +class+ attribute is deleted from the node.
550
+ #
551
+ # This is a convenience function and is equivalent to:
552
+ # node.kwattr_remove("class", names)
553
+ #
554
+ # @see #kwattr_remove
555
+ # @see #classes
556
+ # @see #add_class
557
+ # @see #append_class
558
+ #
559
+ # @param names [String, Array<String>]
560
+ #
561
+ # CSS class names to be removed from the Node's +class+ attribute. May
562
+ # be a string containing whitespace-delimited names, or an Array of
563
+ # String names. Any class names already present will be removed. If no
564
+ # CSS classes remain, the +class+ attribute is deleted.
565
+ #
566
+ # @return [Node] Returns +self+ for ease of chaining method calls.
567
+ #
568
+ # @example
569
+ # node # => <div class="section header"></div>
570
+ # node.remove_class("section") # => <div class="header"></div>
571
+ # node.remove_class("header") # => <div></div> # attribute is deleted when empty
572
+ #
573
+ def remove_class(names = nil)
574
+ kwattr_remove("class", names)
575
+ end
576
+
577
+ # Retrieve values from a keyword attribute of a Node.
578
+ #
579
+ # A "keyword attribute" is a node attribute that contains a set
580
+ # of space-delimited values. Perhaps the most familiar example
581
+ # of this is the HTML +class+ attribute used to contain CSS
582
+ # classes. But other keyword attributes exist, for instance
583
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
584
+ #
585
+ # @see #classes
586
+ # @see #kwattr_add
587
+ # @see #kwattr_append
588
+ # @see #kwattr_remove
589
+ #
590
+ # @param attribute_name [String] The name of the keyword attribute to be inspected.
591
+ #
592
+ # @return [Array<String>]
593
+ #
594
+ # The values present in the Node's +attribute_name+
595
+ # attribute. If the attribute is empty or non-existent, the
596
+ # return value is an empty array.
597
+ #
598
+ # @example
599
+ # node # => <a rel="nofollow noopener external">link</a>
600
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
601
+ #
602
+ # @since v1.11.0
603
+ #
604
+ def kwattr_values(attribute_name)
605
+ keywordify(get_attribute(attribute_name) || [])
606
+ end
607
+
608
+ # Ensure that values are present in a keyword attribute.
609
+ #
610
+ # Any values in +keywords+ that already exist in the +Node+'s
611
+ # attribute values are _not_ added. Note that any existing
612
+ # duplicates in the attribute values are not removed. Compare
613
+ # with {#kwattr_append}.
614
+ #
615
+ # A "keyword attribute" is a node attribute that contains a set
616
+ # of space-delimited values. Perhaps the most familiar example
617
+ # of this is the HTML +class+ attribute used to contain CSS
618
+ # classes. But other keyword attributes exist, for instance
619
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
620
+ #
621
+ # @see #add_class
622
+ # @see #kwattr_values
623
+ # @see #kwattr_append
624
+ # @see #kwattr_remove
625
+ #
626
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
627
+ #
628
+ # @param keywords [String, Array<String>]
629
+ #
630
+ # Keywords to be added to the attribute named
631
+ # +attribute_name+. May be a string containing
632
+ # whitespace-delimited values, or an Array of String
633
+ # values. Any values already present will not be added. Any
634
+ # values not present will be added. If the named attribute
635
+ # does not exist, it is created.
636
+ #
637
+ # @return [Node] Returns +self+ for ease of chaining method calls.
638
+ #
639
+ # @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
640
+ # node # => <a></a>
641
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
642
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
643
+ #
644
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
645
+ # node # => <a rel="nofollow nofollow"></a>
646
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
647
+ # # Note that "nofollow" is not added because it is already present.
648
+ # # Note also that the pre-existing duplicate "nofollow" is not removed.
649
+ #
650
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
651
+ # node # => <a></a>
652
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
653
+ #
654
+ # @since v1.11.0
655
+ #
656
+ def kwattr_add(attribute_name, keywords)
657
+ keywords = keywordify(keywords)
658
+ current_kws = kwattr_values(attribute_name)
659
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
660
+ set_attribute(attribute_name, new_kws)
383
661
  self
384
662
  end
385
663
 
386
- ###
387
- # Append +name+ to the "class" attribute value of this Node and
388
- # return self. The value is simply appended without checking if
389
- # it is already in the current value. If no "class" attribute
390
- # exists yet, one is created with the given value.
664
+ # Add keywords to a Node's keyword attribute, regardless of
665
+ # duplication. Compare with {#kwattr_add}.
666
+ #
667
+ # A "keyword attribute" is a node attribute that contains a set
668
+ # of space-delimited values. Perhaps the most familiar example
669
+ # of this is the HTML +class+ attribute used to contain CSS
670
+ # classes. But other keyword attributes exist, for instance
671
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
672
+ #
673
+ # @see #append_class
674
+ # @see #kwattr_values
675
+ # @see #kwattr_add
676
+ # @see #kwattr_remove
391
677
  #
392
- # More than one class may be appended at a time, separated by a
393
- # space.
394
- def append_class name
395
- self['class'] = (classes + name.scan(/\S+/)).join(' ')
678
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
679
+ #
680
+ # @param keywords [String, Array<String>]
681
+ #
682
+ # Keywords to be added to the attribute named
683
+ # +attribute_name+. May be a string containing
684
+ # whitespace-delimited values, or an Array of String
685
+ # values. All values passed in will be appended to the named
686
+ # attribute even if they are already present in the
687
+ # attribute. If the named attribute does not exist, it is
688
+ # created.
689
+ #
690
+ # @return [Node] Returns +self+ for ease of chaining method calls.
691
+ #
692
+ # @example Append "nofollow" to the +rel+ attribute.
693
+ # node # => <a></a>
694
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
695
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
696
+ #
697
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
698
+ # node # => <a rel="nofollow"></a>
699
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
700
+ # # Note that "nofollow" is appended even though it is already present.
701
+ #
702
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
703
+ # node # => <a></a>
704
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
705
+ #
706
+ # @since v1.11.0
707
+ #
708
+ def kwattr_append(attribute_name, keywords)
709
+ keywords = keywordify(keywords)
710
+ current_kws = kwattr_values(attribute_name)
711
+ new_kws = (current_kws + keywords).join(" ")
712
+ set_attribute(attribute_name, new_kws)
396
713
  self
397
714
  end
398
715
 
399
- ###
400
- # Remove +name+ from the "class" attribute value of this Node
401
- # and return self. If there are many occurrences of the name,
402
- # they are all removed.
716
+ # Remove keywords from a keyword attribute. Any matching
717
+ # keywords that exist in the named attribute are removed,
718
+ # including any multiple entries.
403
719
  #
404
- # More than one class may be removed at a time, separated by a
405
- # space.
720
+ # If no keywords remain after this operation, or if +keywords+
721
+ # is +nil+, the attribute is deleted from the node.
406
722
  #
407
- # If no class name is left after removal, or when +name+ is nil,
408
- # the "class" attribute is removed from this Node.
409
- def remove_class name = nil
410
- if name
411
- names = classes - name.scan(/\S+/)
412
- if names.empty?
413
- delete 'class'
414
- else
415
- self['class'] = names.join(' ')
416
- end
723
+ # A "keyword attribute" is a node attribute that contains a set
724
+ # of space-delimited values. Perhaps the most familiar example
725
+ # of this is the HTML +class+ attribute used to contain CSS
726
+ # classes. But other keyword attributes exist, for instance
727
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
728
+ #
729
+ # @see #remove_class
730
+ # @see #kwattr_values
731
+ # @see #kwattr_add
732
+ # @see #kwattr_append
733
+ #
734
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
735
+ #
736
+ # @param keywords [String, Array<String>]
737
+ #
738
+ # Keywords to be removed from the attribute named
739
+ # +attribute_name+. May be a string containing
740
+ # whitespace-delimited values, or an Array of String
741
+ # values. Any keywords present in the named attribute will be
742
+ # removed. If no keywords remain, or if +keywords+ is nil, the
743
+ # attribute is deleted.
744
+ #
745
+ # @return [Node] Returns +self+ for ease of chaining method calls.
746
+ #
747
+ # @example
748
+ # node # => <a rel="nofollow noreferrer">link</a>
749
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
750
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
751
+ #
752
+ # @since v1.11.0
753
+ #
754
+ def kwattr_remove(attribute_name, keywords)
755
+ if keywords.nil?
756
+ remove_attribute(attribute_name)
757
+ return self
758
+ end
759
+
760
+ keywords = keywordify(keywords)
761
+ current_kws = kwattr_values(attribute_name)
762
+ new_kws = current_kws - keywords
763
+ if new_kws.empty?
764
+ remove_attribute(attribute_name)
417
765
  else
418
- delete "class"
766
+ set_attribute(attribute_name, new_kws.join(" "))
419
767
  end
420
768
  self
421
769
  end
422
770
 
423
- ###
424
- # Remove the attribute named +name+
425
- def remove_attribute name
426
- attr = attributes[name].remove if key? name
427
- clear_xpath_context if Nokogiri.jruby?
428
- attr
429
- end
430
771
  alias :delete :remove_attribute
772
+ alias :get_attribute :[]
773
+ alias :attr :[]
774
+ alias :set_attribute :[]=
775
+ alias :has_attribute? :key?
776
+
777
+ # @!endgroup
431
778
 
432
779
  ###
433
780
  # Returns true if this Node matches +selector+
434
- def matches? selector
781
+ def matches?(selector)
435
782
  ancestors.last.search(selector).include?(self)
436
783
  end
437
784
 
438
785
  ###
439
786
  # Create a DocumentFragment containing +tags+ that is relative to _this_
440
787
  # context node.
441
- def fragment tags
788
+ def fragment(tags)
442
789
  type = document.html? ? Nokogiri::HTML : Nokogiri::XML
443
790
  type::DocumentFragment.new(document, tags, self)
444
791
  end
@@ -447,7 +794,7 @@ module Nokogiri
447
794
  # Parse +string_or_io+ as a document fragment within the context of
448
795
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
449
796
  # +string_or_io+.
450
- def parse string_or_io, options = nil
797
+ def parse(string_or_io, options = nil)
451
798
  ##
452
799
  # When the current node is unparented and not an element node, use the
453
800
  # document as the parsing context instead. Otherwise, the in-context
@@ -470,30 +817,34 @@ module Nokogiri
470
817
 
471
818
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
472
819
 
473
- ##
474
- # This is a horrible hack, but I don't care. See #313 for background.
820
+ # libxml2 does not obey the `recover` option after encountering errors during `in_context`
821
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
822
+ #
823
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
824
+ # would have been inherited from the context node won't be handled correctly. This hack was
825
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
826
+ # that's not easily prevented (or even detected).
827
+ #
828
+ # I think preferable behavior would be to either:
829
+ #
830
+ # a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
831
+ # b. don't recover, but raise a sensible exception
832
+ #
833
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
834
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
475
835
  error_count = document.errors.length
476
836
  node_set = in_context(contents, options.to_i)
477
- if node_set.empty? and document.errors.length > error_count and options.recover?
478
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
479
- node_set = fragment.children
837
+ if (node_set.empty? && (document.errors.length > error_count))
838
+ if options.recover?
839
+ fragment = Nokogiri::HTML4::DocumentFragment.parse contents
840
+ node_set = fragment.children
841
+ else
842
+ raise document.errors[error_count]
843
+ end
480
844
  end
481
845
  node_set
482
846
  end
483
847
 
484
- ####
485
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
486
- def content= string
487
- self.native_content = encode_special_chars(string.to_s)
488
- end
489
-
490
- ###
491
- # Set the parent Node for this Node
492
- def parent= parent_node
493
- parent_node.add_child(self)
494
- parent_node
495
- end
496
-
497
848
  ###
498
849
  # Returns a Hash of +{prefix => value}+ for all namespaces on this
499
850
  # node and its ancestors.
@@ -509,10 +860,11 @@ module Nokogiri
509
860
  # default namespaces set on ancestor will NOT be, even if self
510
861
  # has no explicit default namespace.
511
862
  def namespaces
512
- Hash[namespace_scopes.map { |nd|
513
- key = ['xmlns', nd.prefix].compact.join(':')
514
- [key, nd.href]
515
- }]
863
+ namespace_scopes.each_with_object({}) do |ns, hash|
864
+ prefix = ns.prefix
865
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
866
+ hash[key] = ns.href
867
+ end
516
868
  end
517
869
 
518
870
  # Returns true if this is a Comment
@@ -530,7 +882,7 @@ module Nokogiri
530
882
  type == DOCUMENT_NODE
531
883
  end
532
884
 
533
- # Returns true if this is an HTML::Document node
885
+ # Returns true if this is an HTML4::Document node
534
886
  def html?
535
887
  type == HTML_DOCUMENT_NODE
536
888
  end
@@ -556,11 +908,11 @@ module Nokogiri
556
908
  end
557
909
 
558
910
  ###
559
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
911
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
560
912
  # nil on XML documents and on unknown tags.
561
913
  def description
562
914
  return nil if document.xml?
563
- Nokogiri::HTML::ElementDescription[name]
915
+ Nokogiri::HTML4::ElementDescription[name]
564
916
  end
565
917
 
566
918
  ###
@@ -574,6 +926,7 @@ module Nokogiri
574
926
  def element?
575
927
  type == ELEMENT_NODE
576
928
  end
929
+
577
930
  alias :elem? :element?
578
931
 
579
932
  ###
@@ -584,7 +937,7 @@ module Nokogiri
584
937
  end
585
938
 
586
939
  # Get the inner_html for this node's Node#children
587
- def inner_html *args
940
+ def inner_html(*args)
588
941
  children.map { |x| x.to_html(*args) }.join
589
942
  end
590
943
 
@@ -592,13 +945,13 @@ module Nokogiri
592
945
  def css_path
593
946
  path.split(/\//).map { |part|
594
947
  part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
595
- }.compact.join(' > ')
948
+ }.compact.join(" > ")
596
949
  end
597
950
 
598
951
  ###
599
952
  # Get a list of ancestor Node for this Node. If +selector+ is given,
600
953
  # the ancestors must match +selector+
601
- def ancestors selector = nil
954
+ def ancestors(selector = nil)
602
955
  return NodeSet.new(document) unless respond_to?(:parent)
603
956
  return NodeSet.new(document) unless parent
604
957
 
@@ -619,57 +972,38 @@ module Nokogiri
619
972
  })
620
973
  end
621
974
 
622
- ###
623
- # Adds a default namespace supplied as a string +url+ href, to self.
624
- # The consequence is as an xmlns attribute with supplied argument were
625
- # present in parsed XML. A default namespace set with this method will
626
- # now show up in #attributes, but when this node is serialized to XML an
627
- # "xmlns" attribute will appear. See also #namespace and #namespace=
628
- def default_namespace= url
629
- add_namespace_definition(nil, url)
630
- end
631
- alias :add_namespace :add_namespace_definition
632
-
633
- ###
634
- # Set the default namespace on this node (as would be defined with an
635
- # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
636
- # a Namespace added this way will NOT be serialized as an xmlns attribute
637
- # for this node. You probably want #default_namespace= instead, or perhaps
638
- # #add_namespace_definition with a nil prefix argument.
639
- def namespace= ns
640
- return set_namespace(ns) unless ns
641
-
642
- unless Nokogiri::XML::Namespace === ns
643
- raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
644
- end
645
- if ns.document != document
646
- raise ArgumentError, 'namespace must be declared on the same document'
647
- end
648
-
649
- set_namespace ns
650
- end
651
-
652
975
  ####
653
976
  # Yields self and all children to +block+ recursively.
654
- def traverse &block
655
- children.each{|j| j.traverse(&block) }
977
+ def traverse(&block)
978
+ children.each { |j| j.traverse(&block) }
656
979
  block.call(self)
657
980
  end
658
981
 
659
982
  ###
660
983
  # Accept a visitor. This method calls "visit" on +visitor+ with self.
661
- def accept visitor
984
+ def accept(visitor)
662
985
  visitor.visit(self)
663
986
  end
664
987
 
665
988
  ###
666
989
  # Test to see if this Node is equal to +other+
667
- def == other
990
+ def ==(other)
668
991
  return false unless other
669
992
  return false unless other.respond_to?(:pointer_id)
670
993
  pointer_id == other.pointer_id
671
994
  end
672
995
 
996
+ ###
997
+ # Compare two Node objects with respect to their Document. Nodes from
998
+ # different documents cannot be compared.
999
+ def <=>(other)
1000
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1001
+ return nil unless document == other.document
1002
+ compare other
1003
+ end
1004
+
1005
+ # @!group Serialization and Generating Output
1006
+
673
1007
  ###
674
1008
  # Serialize Node using +options+. Save options can also be set using a
675
1009
  # block. See SaveOptions.
@@ -684,17 +1018,17 @@ module Nokogiri
684
1018
  # config.format.as_xml
685
1019
  # end
686
1020
  #
687
- def serialize *args, &block
1021
+ def serialize(*args, &block)
688
1022
  options = args.first.is_a?(Hash) ? args.shift : {
689
- :encoding => args[0],
690
- :save_with => args[1]
1023
+ :encoding => args[0],
1024
+ :save_with => args[1],
691
1025
  }
692
1026
 
693
1027
  encoding = options[:encoding] || document.encoding
694
1028
  options[:encoding] = encoding
695
1029
 
696
1030
  outstring = String.new
697
- outstring.force_encoding(Encoding.find(encoding || 'utf-8'))
1031
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
698
1032
  io = StringIO.new(outstring)
699
1033
  write_to io, options, &block
700
1034
  io.string
@@ -707,7 +1041,7 @@ module Nokogiri
707
1041
  #
708
1042
  # See Node#write_to for a list of +options+. For formatted output,
709
1043
  # use Node#to_xhtml instead.
710
- def to_html options = {}
1044
+ def to_html(options = {})
711
1045
  to_format SaveOptions::DEFAULT_HTML, options
712
1046
  end
713
1047
 
@@ -717,7 +1051,7 @@ module Nokogiri
717
1051
  # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
718
1052
  #
719
1053
  # See Node#write_to for a list of +options+
720
- def to_xml options = {}
1054
+ def to_xml(options = {})
721
1055
  options[:save_with] ||= SaveOptions::DEFAULT_XML
722
1056
  serialize(options)
723
1057
  end
@@ -728,7 +1062,7 @@ module Nokogiri
728
1062
  # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
729
1063
  #
730
1064
  # See Node#write_to for a list of +options+
731
- def to_xhtml options = {}
1065
+ def to_xhtml(options = {})
732
1066
  to_format SaveOptions::DEFAULT_XHTML, options
733
1067
  end
734
1068
 
@@ -749,29 +1083,34 @@ module Nokogiri
749
1083
  #
750
1084
  # node.write_to(io, :indent_text => '-', :indent => 2)
751
1085
  #
752
- def write_to io, *options
753
- options = options.first.is_a?(Hash) ? options.shift : {}
754
- encoding = options[:encoding] || options[0]
1086
+ def write_to(io, *options)
1087
+ options = options.first.is_a?(Hash) ? options.shift : {}
1088
+ encoding = options[:encoding] || options[0]
755
1089
  if Nokogiri.jruby?
756
- save_options = options[:save_with] || options[1]
757
- indent_times = options[:indent] || 0
1090
+ save_options = options[:save_with] || options[1]
1091
+ indent_times = options[:indent] || 0
758
1092
  else
759
- save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
760
- indent_times = options[:indent] || 2
1093
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1094
+ indent_times = options[:indent] || 2
761
1095
  end
762
- indent_text = options[:indent_text] || ' '
1096
+ indent_text = options[:indent_text] || " "
1097
+
1098
+ # Any string times 0 returns an empty string. Therefore, use the same
1099
+ # string instead of generating a new empty string for every node with
1100
+ # zero indentation.
1101
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
763
1102
 
764
1103
  config = SaveOptions.new(save_options.to_i)
765
1104
  yield config if block_given?
766
1105
 
767
- native_write_to(io, encoding, indent_text * indent_times, config.options)
1106
+ native_write_to(io, encoding, indentation, config.options)
768
1107
  end
769
1108
 
770
1109
  ###
771
1110
  # Write Node as HTML to +io+ with +options+
772
1111
  #
773
1112
  # See Node#write_to for a list of +options+
774
- def write_html_to io, options = {}
1113
+ def write_html_to(io, options = {})
775
1114
  write_format_to SaveOptions::DEFAULT_HTML, io, options
776
1115
  end
777
1116
 
@@ -779,7 +1118,7 @@ module Nokogiri
779
1118
  # Write Node as XHTML to +io+ with +options+
780
1119
  #
781
1120
  # See Node#write_to for a list of +options+
782
- def write_xhtml_to io, options = {}
1121
+ def write_xhtml_to(io, options = {})
783
1122
  write_format_to SaveOptions::DEFAULT_XHTML, io, options
784
1123
  end
785
1124
 
@@ -789,52 +1128,66 @@ module Nokogiri
789
1128
  # doc.write_xml_to io, :encoding => 'UTF-8'
790
1129
  #
791
1130
  # See Node#write_to for a list of options
792
- def write_xml_to io, options = {}
1131
+ def write_xml_to(io, options = {})
793
1132
  options[:save_with] ||= SaveOptions::DEFAULT_XML
794
1133
  write_to io, options
795
1134
  end
796
1135
 
797
- ###
798
- # Compare two Node objects with respect to their Document. Nodes from
799
- # different documents cannot be compared.
800
- def <=> other
801
- return nil unless other.is_a?(Nokogiri::XML::Node)
802
- return nil unless document == other.document
803
- compare other
1136
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1137
+ c14n_root = self
1138
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1139
+ tn = node.is_a?(XML::Node) ? node : parent
1140
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1141
+ end
804
1142
  end
805
1143
 
806
- ###
807
- # Do xinclude substitution on the subtree below node. If given a block, a
808
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
809
- # passed to it, allowing more convenient modification of the parser options.
810
- def do_xinclude options = XML::ParseOptions::DEFAULT_XML
811
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
1144
+ # @!endgroup
812
1145
 
813
- # give options to user
814
- yield options if block_given?
1146
+ protected
815
1147
 
816
- # call c extension
817
- process_xincludes(options.to_i)
1148
+ def coerce(data)
1149
+ case data
1150
+ when XML::NodeSet
1151
+ return data
1152
+ when XML::DocumentFragment
1153
+ return data.children
1154
+ when String
1155
+ return fragment(data).children
1156
+ when Document, XML::Attr
1157
+ # unacceptable
1158
+ when XML::Node
1159
+ return data
1160
+ end
1161
+
1162
+ raise ArgumentError, <<-EOERR
1163
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1164
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1165
+ EOERR
818
1166
  end
819
1167
 
820
- def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
821
- c14n_root = self
822
- document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
823
- tn = node.is_a?(XML::Node) ? node : parent
824
- tn == c14n_root || tn.ancestors.include?(c14n_root)
1168
+ private
1169
+
1170
+ def keywordify(keywords)
1171
+ case keywords
1172
+ when Enumerable
1173
+ return keywords
1174
+ when String
1175
+ return keywords.scan(/\S+/)
1176
+ else
1177
+ raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
825
1178
  end
826
1179
  end
827
1180
 
828
- private
1181
+ def add_sibling(next_or_previous, node_or_tags)
1182
+ raise("Cannot add sibling to a node with no parent") unless parent
829
1183
 
830
- def add_sibling next_or_previous, node_or_tags
831
1184
  impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
832
- iter = (next_or_previous == :next) ? :reverse_each : :each
1185
+ iter = (next_or_previous == :next) ? :reverse_each : :each
833
1186
 
834
- node_or_tags = coerce node_or_tags
1187
+ node_or_tags = parent.coerce(node_or_tags)
835
1188
  if node_or_tags.is_a?(XML::NodeSet)
836
1189
  if text?
837
- pivot = Nokogiri::XML::Node.new 'dummy', document
1190
+ pivot = Nokogiri::XML::Node.new "dummy", document
838
1191
  send impl, pivot
839
1192
  else
840
1193
  pivot = self
@@ -847,17 +1200,18 @@ module Nokogiri
847
1200
  node_or_tags
848
1201
  end
849
1202
 
850
- def to_format save_option, options
851
- # FIXME: this is a hack around broken libxml versions
852
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1203
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1204
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1205
+
1206
+ def to_format(save_option, options)
1207
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
853
1208
 
854
1209
  options[:save_with] = save_option unless options[:save_with]
855
1210
  serialize(options)
856
1211
  end
857
1212
 
858
- def write_format_to save_option, io, options
859
- # FIXME: this is a hack around broken libxml versions
860
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
1213
+ def write_format_to(save_option, io, options)
1214
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
861
1215
 
862
1216
  options[:save_with] ||= save_option
863
1217
  write_to io, options
@@ -867,30 +1221,10 @@ module Nokogiri
867
1221
  [:name, :namespace, :attribute_nodes, :children]
868
1222
  end
869
1223
 
870
- def coerce data # :nodoc:
871
- case data
872
- when XML::NodeSet
873
- return data
874
- when XML::DocumentFragment
875
- return data.children
876
- when String
877
- return fragment(data).children
878
- when Document, XML::Attr
879
- # unacceptable
880
- when XML::Node
881
- return data
882
- end
883
-
884
- raise ArgumentError, <<-EOERR
885
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
886
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
887
- EOERR
888
- end
889
-
890
1224
  # @private
891
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
1225
+ IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
892
1226
 
893
- def add_child_node_and_reparent_attrs node # :nodoc:
1227
+ def add_child_node_and_reparent_attrs(node)
894
1228
  add_child_node node
895
1229
  node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
896
1230
  attr_node.remove
@@ -900,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
900
1234
  end
901
1235
  end
902
1236
  end
1237
+
1238
+ require_relative "node/save_options"