nokogiri 1.11.0.rc4-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1682 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +477 -0
  8. data/ext/nokogiri/extconf.rb +819 -0
  9. data/ext/nokogiri/html_document.c +171 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/include/libexslt/exslt.h +102 -0
  20. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  21. data/ext/nokogiri/include/libexslt/exsltexports.h +140 -0
  22. data/ext/nokogiri/include/libxml2/libxml/DOCBparser.h +96 -0
  23. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  24. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  25. data/ext/nokogiri/include/libxml2/libxml/SAX.h +173 -0
  26. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +178 -0
  27. data/ext/nokogiri/include/libxml2/libxml/c14n.h +126 -0
  28. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  29. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  30. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  31. data/ext/nokogiri/include/libxml2/libxml/dict.h +79 -0
  32. data/ext/nokogiri/include/libxml2/libxml/encoding.h +245 -0
  33. data/ext/nokogiri/include/libxml2/libxml/entities.h +151 -0
  34. data/ext/nokogiri/include/libxml2/libxml/globals.h +508 -0
  35. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  36. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  37. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +163 -0
  38. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  39. data/ext/nokogiri/include/libxml2/libxml/parser.h +1241 -0
  40. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +644 -0
  41. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  42. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +217 -0
  43. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  44. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  45. data/ext/nokogiri/include/libxml2/libxml/threads.h +89 -0
  46. data/ext/nokogiri/include/libxml2/libxml/tree.h +1311 -0
  47. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  48. data/ext/nokogiri/include/libxml2/libxml/valid.h +458 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +366 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +945 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +153 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +224 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +151 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +485 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  66. data/ext/nokogiri/include/libxml2/libxml/xpath.h +566 -0
  67. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  68. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +114 -0
  69. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  70. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  71. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  72. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  73. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  74. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  75. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  76. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  77. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  78. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  79. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  80. data/ext/nokogiri/include/libxslt/security.h +104 -0
  81. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  82. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  83. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  84. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  85. data/ext/nokogiri/include/libxslt/xsltInternals.h +1978 -0
  86. data/ext/nokogiri/include/libxslt/xsltconfig.h +180 -0
  87. data/ext/nokogiri/include/libxslt/xsltexports.h +142 -0
  88. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  89. data/ext/nokogiri/include/libxslt/xsltutils.h +313 -0
  90. data/ext/nokogiri/nokogiri.c +135 -0
  91. data/ext/nokogiri/nokogiri.h +130 -0
  92. data/ext/nokogiri/xml_attr.c +103 -0
  93. data/ext/nokogiri/xml_attr.h +9 -0
  94. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  95. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  96. data/ext/nokogiri/xml_cdata.c +62 -0
  97. data/ext/nokogiri/xml_cdata.h +9 -0
  98. data/ext/nokogiri/xml_comment.c +69 -0
  99. data/ext/nokogiri/xml_comment.h +9 -0
  100. data/ext/nokogiri/xml_document.c +622 -0
  101. data/ext/nokogiri/xml_document.h +23 -0
  102. data/ext/nokogiri/xml_document_fragment.c +48 -0
  103. data/ext/nokogiri/xml_document_fragment.h +10 -0
  104. data/ext/nokogiri/xml_dtd.c +202 -0
  105. data/ext/nokogiri/xml_dtd.h +10 -0
  106. data/ext/nokogiri/xml_element_content.c +123 -0
  107. data/ext/nokogiri/xml_element_content.h +10 -0
  108. data/ext/nokogiri/xml_element_decl.c +69 -0
  109. data/ext/nokogiri/xml_element_decl.h +9 -0
  110. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  111. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  112. data/ext/nokogiri/xml_entity_decl.c +110 -0
  113. data/ext/nokogiri/xml_entity_decl.h +10 -0
  114. data/ext/nokogiri/xml_entity_reference.c +52 -0
  115. data/ext/nokogiri/xml_entity_reference.h +9 -0
  116. data/ext/nokogiri/xml_io.c +63 -0
  117. data/ext/nokogiri/xml_io.h +11 -0
  118. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  119. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  120. data/ext/nokogiri/xml_namespace.c +111 -0
  121. data/ext/nokogiri/xml_namespace.h +14 -0
  122. data/ext/nokogiri/xml_node.c +1773 -0
  123. data/ext/nokogiri/xml_node.h +13 -0
  124. data/ext/nokogiri/xml_node_set.c +486 -0
  125. data/ext/nokogiri/xml_node_set.h +12 -0
  126. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  127. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  128. data/ext/nokogiri/xml_reader.c +657 -0
  129. data/ext/nokogiri/xml_reader.h +10 -0
  130. data/ext/nokogiri/xml_relax_ng.c +179 -0
  131. data/ext/nokogiri/xml_relax_ng.h +9 -0
  132. data/ext/nokogiri/xml_sax_parser.c +305 -0
  133. data/ext/nokogiri/xml_sax_parser.h +39 -0
  134. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  135. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  136. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  137. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  138. data/ext/nokogiri/xml_schema.c +276 -0
  139. data/ext/nokogiri/xml_schema.h +9 -0
  140. data/ext/nokogiri/xml_syntax_error.c +64 -0
  141. data/ext/nokogiri/xml_syntax_error.h +13 -0
  142. data/ext/nokogiri/xml_text.c +52 -0
  143. data/ext/nokogiri/xml_text.h +9 -0
  144. data/ext/nokogiri/xml_xpath_context.c +374 -0
  145. data/ext/nokogiri/xml_xpath_context.h +10 -0
  146. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  147. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  148. data/lib/nokogiri.rb +127 -0
  149. data/lib/nokogiri/2.5/nokogiri.bundle +0 -0
  150. data/lib/nokogiri/2.6/nokogiri.bundle +0 -0
  151. data/lib/nokogiri/2.7/nokogiri.bundle +0 -0
  152. data/lib/nokogiri/3.0/nokogiri.bundle +0 -0
  153. data/lib/nokogiri/css.rb +28 -0
  154. data/lib/nokogiri/css/node.rb +53 -0
  155. data/lib/nokogiri/css/parser.rb +751 -0
  156. data/lib/nokogiri/css/parser.y +272 -0
  157. data/lib/nokogiri/css/parser_extras.rb +94 -0
  158. data/lib/nokogiri/css/syntax_error.rb +8 -0
  159. data/lib/nokogiri/css/tokenizer.rb +154 -0
  160. data/lib/nokogiri/css/tokenizer.rex +55 -0
  161. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  162. data/lib/nokogiri/decorators/slop.rb +43 -0
  163. data/lib/nokogiri/html.rb +38 -0
  164. data/lib/nokogiri/html/builder.rb +36 -0
  165. data/lib/nokogiri/html/document.rb +322 -0
  166. data/lib/nokogiri/html/document_fragment.rb +50 -0
  167. data/lib/nokogiri/html/element_description.rb +24 -0
  168. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  169. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  170. data/lib/nokogiri/html/sax/parser.rb +63 -0
  171. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  172. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  173. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  174. data/lib/nokogiri/syntax_error.rb +5 -0
  175. data/lib/nokogiri/version.rb +3 -0
  176. data/lib/nokogiri/version/constant.rb +5 -0
  177. data/lib/nokogiri/version/info.rb +182 -0
  178. data/lib/nokogiri/xml.rb +76 -0
  179. data/lib/nokogiri/xml/attr.rb +15 -0
  180. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  181. data/lib/nokogiri/xml/builder.rb +447 -0
  182. data/lib/nokogiri/xml/cdata.rb +12 -0
  183. data/lib/nokogiri/xml/character_data.rb +8 -0
  184. data/lib/nokogiri/xml/document.rb +290 -0
  185. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  186. data/lib/nokogiri/xml/dtd.rb +33 -0
  187. data/lib/nokogiri/xml/element_content.rb +37 -0
  188. data/lib/nokogiri/xml/element_decl.rb +14 -0
  189. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  190. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  191. data/lib/nokogiri/xml/namespace.rb +14 -0
  192. data/lib/nokogiri/xml/node.rb +1240 -0
  193. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  194. data/lib/nokogiri/xml/node_set.rb +372 -0
  195. data/lib/nokogiri/xml/notation.rb +7 -0
  196. data/lib/nokogiri/xml/parse_options.rb +127 -0
  197. data/lib/nokogiri/xml/pp.rb +3 -0
  198. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  199. data/lib/nokogiri/xml/pp/node.rb +57 -0
  200. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  201. data/lib/nokogiri/xml/reader.rb +116 -0
  202. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  203. data/lib/nokogiri/xml/sax.rb +5 -0
  204. data/lib/nokogiri/xml/sax/document.rb +172 -0
  205. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  206. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  207. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  208. data/lib/nokogiri/xml/schema.rb +72 -0
  209. data/lib/nokogiri/xml/searchable.rb +239 -0
  210. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  211. data/lib/nokogiri/xml/text.rb +10 -0
  212. data/lib/nokogiri/xml/xpath.rb +11 -0
  213. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  214. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  215. data/lib/nokogiri/xslt.rb +57 -0
  216. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  217. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  218. metadata +565 -0
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class DTD < Nokogiri::XML::Node
5
+ undef_method :attribute_nodes
6
+ undef_method :values
7
+ undef_method :content
8
+ undef_method :namespace
9
+ undef_method :namespace_definitions
10
+ undef_method :line if method_defined?(:line)
11
+
12
+ def keys
13
+ attributes.keys
14
+ end
15
+
16
+ def each
17
+ attributes.each do |key, value|
18
+ yield([key, value])
19
+ end
20
+ end
21
+
22
+ def html_dtd?
23
+ name.casecmp('html').zero?
24
+ end
25
+
26
+ def html5_dtd?
27
+ html_dtd? &&
28
+ external_id.nil? &&
29
+ (system_id.nil? || system_id == 'about:legacy-compat')
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ ###
5
+ # Represents the allowed content in an Element Declaration inside a DTD:
6
+ #
7
+ # <?xml version="1.0"?><?TEST-STYLE PIDATA?>
8
+ # <!DOCTYPE staff SYSTEM "staff.dtd" [
9
+ # <!ELEMENT div1 (head, (p | list | note)*, div2*)>
10
+ # ]>
11
+ # </root>
12
+ #
13
+ # ElementContent represents the tree inside the <!ELEMENT> tag shown above
14
+ # that lists the possible content for the div1 tag.
15
+ class ElementContent
16
+ # Possible definitions of type
17
+ PCDATA = 1
18
+ ELEMENT = 2
19
+ SEQ = 3
20
+ OR = 4
21
+
22
+ # Possible content occurrences
23
+ ONCE = 1
24
+ OPT = 2
25
+ MULT = 3
26
+ PLUS = 4
27
+
28
+ attr_reader :document
29
+
30
+ ###
31
+ # Get the children of this ElementContent node
32
+ def children
33
+ [c1, c2].compact
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class ElementDecl < Nokogiri::XML::Node
5
+ undef_method :namespace
6
+ undef_method :namespace_definitions
7
+ undef_method :line if method_defined?(:line)
8
+
9
+ def inspect
10
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class EntityDecl < Nokogiri::XML::Node
5
+ undef_method :attribute_nodes
6
+ undef_method :attributes
7
+ undef_method :namespace
8
+ undef_method :namespace_definitions
9
+ undef_method :line if method_defined?(:line)
10
+
11
+ def self.new name, doc, *args
12
+ doc.create_entity(name, *args)
13
+ end
14
+
15
+ def inspect
16
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class EntityReference < Nokogiri::XML::Node
5
+ def children
6
+ # libxml2 will create a malformed child node for predefined
7
+ # entities. because any use of that child is likely to cause a
8
+ # segfault, we shall pretend that it doesn't exist.
9
+ #
10
+ # see https://github.com/sparklemotion/nokogiri/issues/1238 for details
11
+ NodeSet.new(document)
12
+ end
13
+
14
+ def inspect_attributes
15
+ [:name]
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class Namespace
5
+ include Nokogiri::XML::PP::Node
6
+ attr_reader :document
7
+
8
+ private
9
+ def inspect_attributes
10
+ [:prefix, :href]
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,1240 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+ require "stringio"
4
+ require "nokogiri/xml/node/save_options"
5
+
6
+ module Nokogiri
7
+ module XML
8
+ ####
9
+ # Nokogiri::XML::Node is your window to the fun filled world of dealing
10
+ # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
11
+ # to a hash with regard to attributes. For example (from irb):
12
+ #
13
+ # irb(main):004:0> node
14
+ # => <a href="#foo" id="link">link</a>
15
+ # irb(main):005:0> node['href']
16
+ # => "#foo"
17
+ # irb(main):006:0> node.keys
18
+ # => ["href", "id"]
19
+ # irb(main):007:0> node.values
20
+ # => ["#foo", "link"]
21
+ # irb(main):008:0> node['class'] = 'green'
22
+ # => "green"
23
+ # irb(main):009:0> node
24
+ # => <a href="#foo" id="link" class="green">link</a>
25
+ # irb(main):010:0>
26
+ #
27
+ # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
28
+ #
29
+ # Nokogiri::XML::Node also has methods that let you move around your
30
+ # tree. For navigating your tree, see:
31
+ #
32
+ # * Nokogiri::XML::Node#parent
33
+ # * Nokogiri::XML::Node#children
34
+ # * Nokogiri::XML::Node#next
35
+ # * Nokogiri::XML::Node#previous
36
+ #
37
+ #
38
+ # When printing or otherwise emitting a document or a node (and
39
+ # its subtree), there are a few methods you might want to use:
40
+ #
41
+ # * content, text, inner_text, to_str: emit plaintext
42
+ #
43
+ # These methods will all emit the plaintext version of your
44
+ # document, meaning that entities will be replaced (e.g., "&lt;"
45
+ # will be replaced with "<"), meaning that any sanitizing will
46
+ # likely be un-done in the output.
47
+ #
48
+ # * to_s, to_xml, to_html, inner_html: emit well-formed markup
49
+ #
50
+ # These methods will all emit properly-escaped markup, meaning
51
+ # that it's suitable for consumption by browsers, parsers, etc.
52
+ #
53
+ # You may search this node's subtree using Searchable#xpath and Searchable#css
54
+ class Node
55
+ include Nokogiri::XML::PP::Node
56
+ include Nokogiri::XML::Searchable
57
+ include Enumerable
58
+
59
+ # Element node type, see Nokogiri::XML::Node#element?
60
+ ELEMENT_NODE = 1
61
+ # Attribute node type
62
+ ATTRIBUTE_NODE = 2
63
+ # Text node type, see Nokogiri::XML::Node#text?
64
+ TEXT_NODE = 3
65
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
66
+ CDATA_SECTION_NODE = 4
67
+ # Entity reference node type
68
+ ENTITY_REF_NODE = 5
69
+ # Entity node type
70
+ ENTITY_NODE = 6
71
+ # PI node type
72
+ PI_NODE = 7
73
+ # Comment node type, see Nokogiri::XML::Node#comment?
74
+ COMMENT_NODE = 8
75
+ # Document node type, see Nokogiri::XML::Node#xml?
76
+ DOCUMENT_NODE = 9
77
+ # Document type node type
78
+ DOCUMENT_TYPE_NODE = 10
79
+ # Document fragment node type
80
+ DOCUMENT_FRAG_NODE = 11
81
+ # Notation node type
82
+ NOTATION_NODE = 12
83
+ # HTML document node type, see Nokogiri::XML::Node#html?
84
+ HTML_DOCUMENT_NODE = 13
85
+ # DTD node type
86
+ DTD_NODE = 14
87
+ # Element declaration type
88
+ ELEMENT_DECL = 15
89
+ # Attribute declaration type
90
+ ATTRIBUTE_DECL = 16
91
+ # Entity declaration type
92
+ ENTITY_DECL = 17
93
+ # Namespace declaration type
94
+ NAMESPACE_DECL = 18
95
+ # XInclude start type
96
+ XINCLUDE_START = 19
97
+ # XInclude end type
98
+ XINCLUDE_END = 20
99
+ # DOCB document node type
100
+ DOCB_DOCUMENT_NODE = 21
101
+
102
+ def initialize(name, document) # :nodoc:
103
+ # ... Ya. This is empty on purpose.
104
+ end
105
+
106
+ ###
107
+ # Decorate this node with the decorators set up in this node's Document
108
+ def decorate!
109
+ document.decorate(self)
110
+ end
111
+
112
+ # @!group Searching via XPath or CSS Queries
113
+
114
+ ###
115
+ # Search this node's immediate children using CSS selector +selector+
116
+ def >(selector)
117
+ ns = document.root.namespaces
118
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
119
+ end
120
+
121
+ # @!endgroup
122
+
123
+ # @!group Manipulating Document Structure
124
+
125
+ ###
126
+ # Add +node_or_tags+ as a child of this Node.
127
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
128
+ #
129
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
130
+ #
131
+ # Also see related method +<<+.
132
+ def add_child(node_or_tags)
133
+ node_or_tags = coerce(node_or_tags)
134
+ if node_or_tags.is_a?(XML::NodeSet)
135
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
136
+ else
137
+ add_child_node_and_reparent_attrs node_or_tags
138
+ end
139
+ node_or_tags
140
+ end
141
+
142
+ ###
143
+ # Add +node_or_tags+ as the first child of this Node.
144
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
145
+ #
146
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
147
+ #
148
+ # Also see related method +add_child+.
149
+ def prepend_child(node_or_tags)
150
+ if first = children.first
151
+ # Mimic the error add_child would raise.
152
+ raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
153
+ first.__send__(:add_sibling, :previous, node_or_tags)
154
+ else
155
+ add_child(node_or_tags)
156
+ end
157
+ end
158
+
159
+ ###
160
+ # Add html around this node
161
+ #
162
+ # Returns self
163
+ def wrap(html)
164
+ new_parent = document.parse(html).first
165
+ add_next_sibling(new_parent)
166
+ new_parent.add_child(self)
167
+ self
168
+ end
169
+
170
+ ###
171
+ # Add +node_or_tags+ as a child of this Node.
172
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
173
+ #
174
+ # Returns self, to support chaining of calls (e.g., root << child1 << child2)
175
+ #
176
+ # Also see related method +add_child+.
177
+ def <<(node_or_tags)
178
+ add_child node_or_tags
179
+ self
180
+ end
181
+
182
+ ###
183
+ # Insert +node_or_tags+ before this Node (as a sibling).
184
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
185
+ #
186
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
187
+ #
188
+ # Also see related method +before+.
189
+ def add_previous_sibling(node_or_tags)
190
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
191
+
192
+ add_sibling :previous, node_or_tags
193
+ end
194
+
195
+ ###
196
+ # Insert +node_or_tags+ after this Node (as a sibling).
197
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
198
+ #
199
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
200
+ #
201
+ # Also see related method +after+.
202
+ def add_next_sibling(node_or_tags)
203
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
204
+
205
+ add_sibling :next, node_or_tags
206
+ end
207
+
208
+ ####
209
+ # Insert +node_or_tags+ before this node (as a sibling).
210
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
211
+ #
212
+ # Returns self, to support chaining of calls.
213
+ #
214
+ # Also see related method +add_previous_sibling+.
215
+ def before(node_or_tags)
216
+ add_previous_sibling node_or_tags
217
+ self
218
+ end
219
+
220
+ ####
221
+ # Insert +node_or_tags+ after this node (as a sibling).
222
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
223
+ #
224
+ # Returns self, to support chaining of calls.
225
+ #
226
+ # Also see related method +add_next_sibling+.
227
+ def after(node_or_tags)
228
+ add_next_sibling node_or_tags
229
+ self
230
+ end
231
+
232
+ ####
233
+ # Set the inner html for this Node to +node_or_tags+
234
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
235
+ #
236
+ # Returns self.
237
+ #
238
+ # Also see related method +children=+
239
+ def inner_html=(node_or_tags)
240
+ self.children = node_or_tags
241
+ self
242
+ end
243
+
244
+ ####
245
+ # Set the inner html for this Node +node_or_tags+
246
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
247
+ #
248
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
249
+ #
250
+ # Also see related method +inner_html=+
251
+ def children=(node_or_tags)
252
+ node_or_tags = coerce(node_or_tags)
253
+ children.unlink
254
+ if node_or_tags.is_a?(XML::NodeSet)
255
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
256
+ else
257
+ add_child_node_and_reparent_attrs node_or_tags
258
+ end
259
+ node_or_tags
260
+ end
261
+
262
+ ####
263
+ # Replace this Node with +node_or_tags+.
264
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
265
+ #
266
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
267
+ #
268
+ # Also see related method +swap+.
269
+ def replace(node_or_tags)
270
+ raise("Cannot replace a node with no parent") unless parent
271
+
272
+ # We cannot replace a text node directly, otherwise libxml will return
273
+ # an internal error at parser.c:13031, I don't know exactly why
274
+ # libxml is trying to find a parent node that is an element or document
275
+ # so I can't tell if this is bug in libxml or not. issue #775.
276
+ if text?
277
+ replacee = Nokogiri::XML::Node.new "dummy", document
278
+ add_previous_sibling_node replacee
279
+ unlink
280
+ return replacee.replace node_or_tags
281
+ end
282
+
283
+ node_or_tags = parent.coerce(node_or_tags)
284
+
285
+ if node_or_tags.is_a?(XML::NodeSet)
286
+ node_or_tags.each { |n| add_previous_sibling n }
287
+ unlink
288
+ else
289
+ replace_node node_or_tags
290
+ end
291
+ node_or_tags
292
+ end
293
+
294
+ ####
295
+ # Swap this Node for +node_or_tags+
296
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
297
+ #
298
+ # Returns self, to support chaining of calls.
299
+ #
300
+ # Also see related method +replace+.
301
+ def swap(node_or_tags)
302
+ replace node_or_tags
303
+ self
304
+ end
305
+
306
+ ####
307
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
308
+ def content=(string)
309
+ self.native_content = encode_special_chars(string.to_s)
310
+ end
311
+
312
+ ###
313
+ # Set the parent Node for this Node
314
+ def parent=(parent_node)
315
+ parent_node.add_child(self)
316
+ parent_node
317
+ end
318
+
319
+ ###
320
+ # Adds a default namespace supplied as a string +url+ href, to self.
321
+ # The consequence is as an xmlns attribute with supplied argument were
322
+ # present in parsed XML. A default namespace set with this method will
323
+ # now show up in #attributes, but when this node is serialized to XML an
324
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
325
+ def default_namespace=(url)
326
+ add_namespace_definition(nil, url)
327
+ end
328
+
329
+ ###
330
+ # Set the default namespace on this node (as would be defined with an
331
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
332
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
333
+ # for this node. You probably want #default_namespace= instead, or perhaps
334
+ # #add_namespace_definition with a nil prefix argument.
335
+ def namespace=(ns)
336
+ return set_namespace(ns) unless ns
337
+
338
+ unless Nokogiri::XML::Namespace === ns
339
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
340
+ end
341
+ if ns.document != document
342
+ raise ArgumentError, "namespace must be declared on the same document"
343
+ end
344
+
345
+ set_namespace ns
346
+ end
347
+
348
+ ###
349
+ # Do xinclude substitution on the subtree below node. If given a block, a
350
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
351
+ # passed to it, allowing more convenient modification of the parser options.
352
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
353
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
354
+
355
+ # give options to user
356
+ yield options if block_given?
357
+
358
+ # call c extension
359
+ process_xincludes(options.to_i)
360
+ end
361
+
362
+ alias :next :next_sibling
363
+ alias :previous :previous_sibling
364
+ alias :next= :add_next_sibling
365
+ alias :previous= :add_previous_sibling
366
+ alias :remove :unlink
367
+ alias :name= :node_name=
368
+ alias :add_namespace :add_namespace_definition
369
+
370
+ # @!endgroup
371
+
372
+ alias :text :content
373
+ alias :inner_text :content
374
+ alias :name :node_name
375
+ alias :type :node_type
376
+ alias :to_str :text
377
+ alias :clone :dup
378
+ alias :elements :element_children
379
+
380
+ # @!group Working With Node Attributes
381
+
382
+ ###
383
+ # Get the attribute value for the attribute +name+
384
+ def [](name)
385
+ get(name.to_s)
386
+ end
387
+
388
+ ###
389
+ # Set the attribute value for the attribute +name+ to +value+
390
+ def []=(name, value)
391
+ set name.to_s, value.to_s
392
+ end
393
+
394
+ ####
395
+ # Returns a hash containing the node's attributes. The key is
396
+ # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
397
+ # representing the attribute.
398
+ # If you need to distinguish attributes with the same name, with different namespaces
399
+ # use #attribute_nodes instead.
400
+ def attributes
401
+ attribute_nodes.each_with_object({}) do |node, hash|
402
+ hash[node.node_name] = node
403
+ end
404
+ end
405
+
406
+ ###
407
+ # Get the attribute values for this Node.
408
+ def values
409
+ attribute_nodes.map(&:value)
410
+ end
411
+
412
+ ###
413
+ # Does this Node's attributes include <value>
414
+ def value?(value)
415
+ values.include? value
416
+ end
417
+
418
+ ###
419
+ # Get the attribute names for this Node.
420
+ def keys
421
+ attribute_nodes.map(&:node_name)
422
+ end
423
+
424
+ ###
425
+ # Iterate over each attribute name and value pair for this Node.
426
+ def each
427
+ attribute_nodes.each { |node|
428
+ yield [node.node_name, node.value]
429
+ }
430
+ end
431
+
432
+ ###
433
+ # Remove the attribute named +name+
434
+ def remove_attribute(name)
435
+ attr = attributes[name].remove if key? name
436
+ clear_xpath_context if Nokogiri.jruby?
437
+ attr
438
+ end
439
+
440
+ # Get the CSS class names of a Node.
441
+ #
442
+ # This is a convenience function and is equivalent to:
443
+ # node.kwattr_values("class")
444
+ #
445
+ # @see #kwattr_values
446
+ # @see #add_class
447
+ # @see #append_class
448
+ # @see #remove_class
449
+ #
450
+ # @return [Array<String>]
451
+ #
452
+ # The CSS classes present in the Node's +class+ attribute. If
453
+ # the attribute is empty or non-existent, the return value is
454
+ # an empty array.
455
+ #
456
+ # @example
457
+ # node # => <div class="section title header"></div>
458
+ # node.classes # => ["section", "title", "header"]
459
+ #
460
+ def classes
461
+ kwattr_values("class")
462
+ end
463
+
464
+ # Ensure HTML CSS classes are present on a +Node+. Any CSS
465
+ # classes in +names+ that already exist in the +Node+'s +class+
466
+ # attribute are _not_ added. Note that any existing duplicates
467
+ # in the +class+ attribute are not removed. Compare with
468
+ # {#append_class}.
469
+ #
470
+ # This is a convenience function and is equivalent to:
471
+ # node.kwattr_add("class", names)
472
+ #
473
+ # @see #kwattr_add
474
+ # @see #classes
475
+ # @see #append_class
476
+ # @see #remove_class
477
+ #
478
+ # @param names [String, Array<String>]
479
+ #
480
+ # CSS class names to be added to the Node's +class+
481
+ # attribute. May be a string containing whitespace-delimited
482
+ # names, or an Array of String names. Any class names already
483
+ # present will not be added. Any class names not present will
484
+ # be added. If no +class+ attribute exists, one is created.
485
+ #
486
+ # @return [Node] Returns +self+ for ease of chaining method calls.
487
+ #
488
+ # @example Ensure that a +Node+ has CSS class "section"
489
+ # node # => <div></div>
490
+ # node.add_class("section") # => <div class="section"></div>
491
+ # node.add_class("section") # => <div class="section"></div> # duplicate not added
492
+ #
493
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
494
+ # node # => <div class="section section"></div>
495
+ # node.add_class("section header") # => <div class="section section header"></div>
496
+ # # Note that the CSS class "section" is not added because it is already present.
497
+ # # Note also that the pre-existing duplicate CSS class "section" is not removed.
498
+ #
499
+ # @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
500
+ # node # => <div></div>
501
+ # node.add_class(["section", "header"]) # => <div class="section header"></div>
502
+ #
503
+ def add_class(names)
504
+ kwattr_add("class", names)
505
+ end
506
+
507
+ # Add HTML CSS classes to a +Node+, regardless of
508
+ # duplication. Compare with {#add_class}.
509
+ #
510
+ # This is a convenience function and is equivalent to:
511
+ # node.kwattr_append("class", names)
512
+ #
513
+ # @see #kwattr_append
514
+ # @see #classes
515
+ # @see #add_class
516
+ # @see #remove_class
517
+ #
518
+ # @param names [String, Array<String>]
519
+ #
520
+ # CSS class names to be appended to the Node's +class+
521
+ # attribute. May be a string containing whitespace-delimited
522
+ # names, or an Array of String names. All class names passed
523
+ # in will be appended to the +class+ attribute even if they
524
+ # are already present in the attribute value. If no +class+
525
+ # attribute exists, one is created.
526
+ #
527
+ # @return [Node] Returns +self+ for ease of chaining method calls.
528
+ #
529
+ # @example Append "section" to a +Node+'s CSS +class+ attriubute
530
+ # node # => <div></div>
531
+ # node.append_class("section") # => <div class="section"></div>
532
+ # node.append_class("section") # => <div class="section section"></div> # duplicate added!
533
+ #
534
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
535
+ # node # => <div class="section section"></div>
536
+ # node.append_class("section header") # => <div class="section section section header"></div>
537
+ # # Note that the CSS class "section" is appended even though it is already present.
538
+ #
539
+ # @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
540
+ # node # => <div></div>
541
+ # node.append_class(["section", "header"]) # => <div class="section header"></div>
542
+ # node.append_class(["section", "header"]) # => <div class="section header section header"></div>
543
+ #
544
+ def append_class(names)
545
+ kwattr_append("class", names)
546
+ end
547
+
548
+ # Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
549
+ # exist in the +Node+'s +class+ attribute are removed, including any
550
+ # multiple entries.
551
+ #
552
+ # If no CSS classes remain after this operation, or if +names+ is
553
+ # +nil+, the +class+ attribute is deleted from the node.
554
+ #
555
+ # This is a convenience function and is equivalent to:
556
+ # node.kwattr_remove("class", names)
557
+ #
558
+ # @see #kwattr_remove
559
+ # @see #classes
560
+ # @see #add_class
561
+ # @see #append_class
562
+ #
563
+ # @param names [String, Array<String>]
564
+ #
565
+ # CSS class names to be removed from the Node's +class+ attribute. May
566
+ # be a string containing whitespace-delimited names, or an Array of
567
+ # String names. Any class names already present will be removed. If no
568
+ # CSS classes remain, the +class+ attribute is deleted.
569
+ #
570
+ # @return [Node] Returns +self+ for ease of chaining method calls.
571
+ #
572
+ # @example
573
+ # node # => <div class="section header"></div>
574
+ # node.remove_class("section") # => <div class="header"></div>
575
+ # node.remove_class("header") # => <div></div> # attribute is deleted when empty
576
+ #
577
+ def remove_class(names = nil)
578
+ kwattr_remove("class", names)
579
+ end
580
+
581
+ # Retrieve values from a keyword attribute of a Node.
582
+ #
583
+ # A "keyword attribute" is a node attribute that contains a set
584
+ # of space-delimited values. Perhaps the most familiar example
585
+ # of this is the HTML +class+ attribute used to contain CSS
586
+ # classes. But other keyword attributes exist, for instance
587
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
588
+ #
589
+ # @see #classes
590
+ # @see #kwattr_add
591
+ # @see #kwattr_append
592
+ # @see #kwattr_remove
593
+ #
594
+ # @param attribute_name [String] The name of the keyword attribute to be inspected.
595
+ #
596
+ # @return [Array<String>]
597
+ #
598
+ # The values present in the Node's +attribute_name+
599
+ # attribute. If the attribute is empty or non-existent, the
600
+ # return value is an empty array.
601
+ #
602
+ # @example
603
+ # node # => <a rel="nofollow noopener external">link</a>
604
+ # node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
605
+ #
606
+ # @since v1.11.0
607
+ #
608
+ def kwattr_values(attribute_name)
609
+ keywordify(get_attribute(attribute_name) || [])
610
+ end
611
+
612
+ # Ensure that values are present in a keyword attribute.
613
+ #
614
+ # Any values in +keywords+ that already exist in the +Node+'s
615
+ # attribute values are _not_ added. Note that any existing
616
+ # duplicates in the attribute values are not removed. Compare
617
+ # with {#kwattr_append}.
618
+ #
619
+ # A "keyword attribute" is a node attribute that contains a set
620
+ # of space-delimited values. Perhaps the most familiar example
621
+ # of this is the HTML +class+ attribute used to contain CSS
622
+ # classes. But other keyword attributes exist, for instance
623
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
624
+ #
625
+ # @see #add_class
626
+ # @see #kwattr_values
627
+ # @see #kwattr_append
628
+ # @see #kwattr_remove
629
+ #
630
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
631
+ #
632
+ # @param keywords [String, Array<String>]
633
+ #
634
+ # Keywords to be added to the attribute named
635
+ # +attribute_name+. May be a string containing
636
+ # whitespace-delimited values, or an Array of String
637
+ # values. Any values already present will not be added. Any
638
+ # values not present will be added. If the named attribute
639
+ # does not exist, it is created.
640
+ #
641
+ # @return [Node] Returns +self+ for ease of chaining method calls.
642
+ #
643
+ # @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
644
+ # node # => <a></a>
645
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
646
+ # node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
647
+ #
648
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
649
+ # node # => <a rel="nofollow nofollow"></a>
650
+ # node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
651
+ # # Note that "nofollow" is not added because it is already present.
652
+ # # Note also that the pre-existing duplicate "nofollow" is not removed.
653
+ #
654
+ # @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
655
+ # node # => <a></a>
656
+ # node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
657
+ #
658
+ # @since v1.11.0
659
+ #
660
+ def kwattr_add(attribute_name, keywords)
661
+ keywords = keywordify(keywords)
662
+ current_kws = kwattr_values(attribute_name)
663
+ new_kws = (current_kws + (keywords - current_kws)).join(" ")
664
+ set_attribute(attribute_name, new_kws)
665
+ self
666
+ end
667
+
668
+ # Add keywords to a Node's keyword attribute, regardless of
669
+ # duplication. Compare with {#kwattr_add}.
670
+ #
671
+ # A "keyword attribute" is a node attribute that contains a set
672
+ # of space-delimited values. Perhaps the most familiar example
673
+ # of this is the HTML +class+ attribute used to contain CSS
674
+ # classes. But other keyword attributes exist, for instance
675
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
676
+ #
677
+ # @see #append_class
678
+ # @see #kwattr_values
679
+ # @see #kwattr_add
680
+ # @see #kwattr_remove
681
+ #
682
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
683
+ #
684
+ # @param keywords [String, Array<String>]
685
+ #
686
+ # Keywords to be added to the attribute named
687
+ # +attribute_name+. May be a string containing
688
+ # whitespace-delimited values, or an Array of String
689
+ # values. All values passed in will be appended to the named
690
+ # attribute even if they are already present in the
691
+ # attribute. If the named attribute does not exist, it is
692
+ # created.
693
+ #
694
+ # @return [Node] Returns +self+ for ease of chaining method calls.
695
+ #
696
+ # @example Append "nofollow" to the +rel+ attribute.
697
+ # node # => <a></a>
698
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
699
+ # node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
700
+ #
701
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
702
+ # node # => <a rel="nofollow"></a>
703
+ # node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
704
+ # # Note that "nofollow" is appended even though it is already present.
705
+ #
706
+ # @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
707
+ # node # => <a></a>
708
+ # node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
709
+ #
710
+ # @since v1.11.0
711
+ #
712
+ def kwattr_append(attribute_name, keywords)
713
+ keywords = keywordify(keywords)
714
+ current_kws = kwattr_values(attribute_name)
715
+ new_kws = (current_kws + keywords).join(" ")
716
+ set_attribute(attribute_name, new_kws)
717
+ self
718
+ end
719
+
720
+ # Remove keywords from a keyword attribute. Any matching
721
+ # keywords that exist in the named attribute are removed,
722
+ # including any multiple entries.
723
+ #
724
+ # If no keywords remain after this operation, or if +keywords+
725
+ # is +nil+, the attribute is deleted from the node.
726
+ #
727
+ # A "keyword attribute" is a node attribute that contains a set
728
+ # of space-delimited values. Perhaps the most familiar example
729
+ # of this is the HTML +class+ attribute used to contain CSS
730
+ # classes. But other keyword attributes exist, for instance
731
+ # [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
732
+ #
733
+ # @see #remove_class
734
+ # @see #kwattr_values
735
+ # @see #kwattr_add
736
+ # @see #kwattr_append
737
+ #
738
+ # @param attribute_name [String] The name of the keyword attribute to be modified.
739
+ #
740
+ # @param keywords [String, Array<String>]
741
+ #
742
+ # Keywords to be removed from the attribute named
743
+ # +attribute_name+. May be a string containing
744
+ # whitespace-delimited values, or an Array of String
745
+ # values. Any keywords present in the named attribute will be
746
+ # removed. If no keywords remain, or if +keywords+ is nil, the
747
+ # attribute is deleted.
748
+ #
749
+ # @return [Node] Returns +self+ for ease of chaining method calls.
750
+ #
751
+ # @example
752
+ # node # => <a rel="nofollow noreferrer">link</a>
753
+ # node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
754
+ # node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
755
+ #
756
+ # @since v1.11.0
757
+ #
758
+ def kwattr_remove(attribute_name, keywords)
759
+ if keywords.nil?
760
+ remove_attribute(attribute_name)
761
+ return self
762
+ end
763
+
764
+ keywords = keywordify(keywords)
765
+ current_kws = kwattr_values(attribute_name)
766
+ new_kws = current_kws - keywords
767
+ if new_kws.empty?
768
+ remove_attribute(attribute_name)
769
+ else
770
+ set_attribute(attribute_name, new_kws.join(" "))
771
+ end
772
+ self
773
+ end
774
+
775
+ alias :delete :remove_attribute
776
+ alias :get_attribute :[]
777
+ alias :attr :[]
778
+ alias :set_attribute :[]=
779
+ alias :has_attribute? :key?
780
+
781
+ # @!endgroup
782
+
783
+ ###
784
+ # Returns true if this Node matches +selector+
785
+ def matches?(selector)
786
+ ancestors.last.search(selector).include?(self)
787
+ end
788
+
789
+ ###
790
+ # Create a DocumentFragment containing +tags+ that is relative to _this_
791
+ # context node.
792
+ def fragment(tags)
793
+ type = document.html? ? Nokogiri::HTML : Nokogiri::XML
794
+ type::DocumentFragment.new(document, tags, self)
795
+ end
796
+
797
+ ###
798
+ # Parse +string_or_io+ as a document fragment within the context of
799
+ # *this* node. Returns a XML::NodeSet containing the nodes parsed from
800
+ # +string_or_io+.
801
+ def parse(string_or_io, options = nil)
802
+ ##
803
+ # When the current node is unparented and not an element node, use the
804
+ # document as the parsing context instead. Otherwise, the in-context
805
+ # parser cannot find an element or a document node.
806
+ # Document Fragments are also not usable by the in-context parser.
807
+ if !element? && !document? && (!parent || parent.fragment?)
808
+ return document.parse(string_or_io, options)
809
+ end
810
+
811
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
812
+ if Integer === options
813
+ options = Nokogiri::XML::ParseOptions.new(options)
814
+ end
815
+ # Give the options to the user
816
+ yield options if block_given?
817
+
818
+ contents = string_or_io.respond_to?(:read) ?
819
+ string_or_io.read :
820
+ string_or_io
821
+
822
+ return Nokogiri::XML::NodeSet.new(document) if contents.empty?
823
+
824
+ # libxml2 does not obey the `recover` option after encountering errors during `in_context`
825
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
826
+ #
827
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
828
+ # would have been inherited from the context node won't be handled correctly. This hack was
829
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
830
+ # that's not easily prevented (or even detected).
831
+ #
832
+ # I think preferable behavior would be to either:
833
+ #
834
+ # a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
835
+ # b. don't recover, but raise a sensible exception
836
+ #
837
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
838
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
839
+ error_count = document.errors.length
840
+ node_set = in_context(contents, options.to_i)
841
+ if (node_set.empty? && (document.errors.length > error_count))
842
+ if options.recover?
843
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
844
+ node_set = fragment.children
845
+ else
846
+ raise document.errors[error_count]
847
+ end
848
+ end
849
+ node_set
850
+ end
851
+
852
+ ###
853
+ # Returns a Hash of +{prefix => value}+ for all namespaces on this
854
+ # node and its ancestors.
855
+ #
856
+ # This method returns the same namespaces as #namespace_scopes.
857
+ #
858
+ # Returns namespaces in scope for self -- those defined on self
859
+ # element directly or any ancestor node -- as a Hash of
860
+ # attribute-name/value pairs. Note that the keys in this hash
861
+ # XML attributes that would be used to define this namespace,
862
+ # such as "xmlns:prefix", not just the prefix. Default namespace
863
+ # set on self will be included with key "xmlns". However,
864
+ # default namespaces set on ancestor will NOT be, even if self
865
+ # has no explicit default namespace.
866
+ def namespaces
867
+ namespace_scopes.each_with_object({}) do |ns, hash|
868
+ prefix = ns.prefix
869
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
870
+ hash[key] = ns.href
871
+ end
872
+ end
873
+
874
+ # Returns true if this is a Comment
875
+ def comment?
876
+ type == COMMENT_NODE
877
+ end
878
+
879
+ # Returns true if this is a CDATA
880
+ def cdata?
881
+ type == CDATA_SECTION_NODE
882
+ end
883
+
884
+ # Returns true if this is an XML::Document node
885
+ def xml?
886
+ type == DOCUMENT_NODE
887
+ end
888
+
889
+ # Returns true if this is an HTML::Document node
890
+ def html?
891
+ type == HTML_DOCUMENT_NODE
892
+ end
893
+
894
+ # Returns true if this is a Document
895
+ def document?
896
+ is_a? XML::Document
897
+ end
898
+
899
+ # Returns true if this is a ProcessingInstruction node
900
+ def processing_instruction?
901
+ type == PI_NODE
902
+ end
903
+
904
+ # Returns true if this is a Text node
905
+ def text?
906
+ type == TEXT_NODE
907
+ end
908
+
909
+ # Returns true if this is a DocumentFragment
910
+ def fragment?
911
+ type == DOCUMENT_FRAG_NODE
912
+ end
913
+
914
+ ###
915
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
916
+ # nil on XML documents and on unknown tags.
917
+ def description
918
+ return nil if document.xml?
919
+ Nokogiri::HTML::ElementDescription[name]
920
+ end
921
+
922
+ ###
923
+ # Is this a read only node?
924
+ def read_only?
925
+ # According to gdome2, these are read-only node types
926
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
927
+ end
928
+
929
+ # Returns true if this is an Element node
930
+ def element?
931
+ type == ELEMENT_NODE
932
+ end
933
+
934
+ alias :elem? :element?
935
+
936
+ ###
937
+ # Turn this node in to a string. If the document is HTML, this method
938
+ # returns html. If the document is XML, this method returns XML.
939
+ def to_s
940
+ document.xml? ? to_xml : to_html
941
+ end
942
+
943
+ # Get the inner_html for this node's Node#children
944
+ def inner_html(*args)
945
+ children.map { |x| x.to_html(*args) }.join
946
+ end
947
+
948
+ # Get the path to this node as a CSS expression
949
+ def css_path
950
+ path.split(/\//).map { |part|
951
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
952
+ }.compact.join(" > ")
953
+ end
954
+
955
+ ###
956
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
957
+ # the ancestors must match +selector+
958
+ def ancestors(selector = nil)
959
+ return NodeSet.new(document) unless respond_to?(:parent)
960
+ return NodeSet.new(document) unless parent
961
+
962
+ parents = [parent]
963
+
964
+ while parents.last.respond_to?(:parent)
965
+ break unless ctx_parent = parents.last.parent
966
+ parents << ctx_parent
967
+ end
968
+
969
+ return NodeSet.new(document, parents) unless selector
970
+
971
+ root = parents.last
972
+ search_results = root.search(selector)
973
+
974
+ NodeSet.new(document, parents.find_all { |parent|
975
+ search_results.include?(parent)
976
+ })
977
+ end
978
+
979
+ ####
980
+ # Yields self and all children to +block+ recursively.
981
+ def traverse(&block)
982
+ children.each { |j| j.traverse(&block) }
983
+ block.call(self)
984
+ end
985
+
986
+ ###
987
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
988
+ def accept(visitor)
989
+ visitor.visit(self)
990
+ end
991
+
992
+ ###
993
+ # Test to see if this Node is equal to +other+
994
+ def ==(other)
995
+ return false unless other
996
+ return false unless other.respond_to?(:pointer_id)
997
+ pointer_id == other.pointer_id
998
+ end
999
+
1000
+ ###
1001
+ # Compare two Node objects with respect to their Document. Nodes from
1002
+ # different documents cannot be compared.
1003
+ def <=>(other)
1004
+ return nil unless other.is_a?(Nokogiri::XML::Node)
1005
+ return nil unless document == other.document
1006
+ compare other
1007
+ end
1008
+
1009
+ # @!group Serialization and Generating Output
1010
+
1011
+ ###
1012
+ # Serialize Node using +options+. Save options can also be set using a
1013
+ # block. See SaveOptions.
1014
+ #
1015
+ # These two statements are equivalent:
1016
+ #
1017
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
1018
+ #
1019
+ # or
1020
+ #
1021
+ # node.serialize(:encoding => 'UTF-8') do |config|
1022
+ # config.format.as_xml
1023
+ # end
1024
+ #
1025
+ def serialize(*args, &block)
1026
+ options = args.first.is_a?(Hash) ? args.shift : {
1027
+ :encoding => args[0],
1028
+ :save_with => args[1],
1029
+ }
1030
+
1031
+ encoding = options[:encoding] || document.encoding
1032
+ options[:encoding] = encoding
1033
+
1034
+ outstring = String.new
1035
+ outstring.force_encoding(Encoding.find(encoding || "utf-8"))
1036
+ io = StringIO.new(outstring)
1037
+ write_to io, options, &block
1038
+ io.string
1039
+ end
1040
+
1041
+ ###
1042
+ # Serialize this Node to HTML
1043
+ #
1044
+ # doc.to_html
1045
+ #
1046
+ # See Node#write_to for a list of +options+. For formatted output,
1047
+ # use Node#to_xhtml instead.
1048
+ def to_html(options = {})
1049
+ to_format SaveOptions::DEFAULT_HTML, options
1050
+ end
1051
+
1052
+ ###
1053
+ # Serialize this Node to XML using +options+
1054
+ #
1055
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
1056
+ #
1057
+ # See Node#write_to for a list of +options+
1058
+ def to_xml(options = {})
1059
+ options[:save_with] ||= SaveOptions::DEFAULT_XML
1060
+ serialize(options)
1061
+ end
1062
+
1063
+ ###
1064
+ # Serialize this Node to XHTML using +options+
1065
+ #
1066
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
1067
+ #
1068
+ # See Node#write_to for a list of +options+
1069
+ def to_xhtml(options = {})
1070
+ to_format SaveOptions::DEFAULT_XHTML, options
1071
+ end
1072
+
1073
+ ###
1074
+ # Write Node to +io+ with +options+. +options+ modify the output of
1075
+ # this method. Valid options are:
1076
+ #
1077
+ # * +:encoding+ for changing the encoding
1078
+ # * +:indent_text+ the indentation text, defaults to one space
1079
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
1080
+ # * +:save_with+ a combination of SaveOptions constants.
1081
+ #
1082
+ # To save with UTF-8 indented twice:
1083
+ #
1084
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
1085
+ #
1086
+ # To save indented with two dashes:
1087
+ #
1088
+ # node.write_to(io, :indent_text => '-', :indent => 2)
1089
+ #
1090
+ def write_to(io, *options)
1091
+ options = options.first.is_a?(Hash) ? options.shift : {}
1092
+ encoding = options[:encoding] || options[0]
1093
+ if Nokogiri.jruby?
1094
+ save_options = options[:save_with] || options[1]
1095
+ indent_times = options[:indent] || 0
1096
+ else
1097
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
1098
+ indent_times = options[:indent] || 2
1099
+ end
1100
+ indent_text = options[:indent_text] || " "
1101
+
1102
+ # Any string times 0 returns an empty string. Therefore, use the same
1103
+ # string instead of generating a new empty string for every node with
1104
+ # zero indentation.
1105
+ indentation = indent_times.zero? ? "" : (indent_text * indent_times)
1106
+
1107
+ config = SaveOptions.new(save_options.to_i)
1108
+ yield config if block_given?
1109
+
1110
+ native_write_to(io, encoding, indentation, config.options)
1111
+ end
1112
+
1113
+ ###
1114
+ # Write Node as HTML to +io+ with +options+
1115
+ #
1116
+ # See Node#write_to for a list of +options+
1117
+ def write_html_to(io, options = {})
1118
+ write_format_to SaveOptions::DEFAULT_HTML, io, options
1119
+ end
1120
+
1121
+ ###
1122
+ # Write Node as XHTML to +io+ with +options+
1123
+ #
1124
+ # See Node#write_to for a list of +options+
1125
+ def write_xhtml_to(io, options = {})
1126
+ write_format_to SaveOptions::DEFAULT_XHTML, io, options
1127
+ end
1128
+
1129
+ ###
1130
+ # Write Node as XML to +io+ with +options+
1131
+ #
1132
+ # doc.write_xml_to io, :encoding => 'UTF-8'
1133
+ #
1134
+ # See Node#write_to for a list of options
1135
+ def write_xml_to(io, options = {})
1136
+ options[:save_with] ||= SaveOptions::DEFAULT_XML
1137
+ write_to io, options
1138
+ end
1139
+
1140
+ def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
1141
+ c14n_root = self
1142
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
1143
+ tn = node.is_a?(XML::Node) ? node : parent
1144
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
1145
+ end
1146
+ end
1147
+
1148
+ # @!endgroup
1149
+
1150
+ protected
1151
+
1152
+ def coerce(data)
1153
+ case data
1154
+ when XML::NodeSet
1155
+ return data
1156
+ when XML::DocumentFragment
1157
+ return data.children
1158
+ when String
1159
+ return fragment(data).children
1160
+ when Document, XML::Attr
1161
+ # unacceptable
1162
+ when XML::Node
1163
+ return data
1164
+ end
1165
+
1166
+ raise ArgumentError, <<-EOERR
1167
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1168
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1169
+ EOERR
1170
+ end
1171
+
1172
+ private
1173
+
1174
+ def keywordify(keywords)
1175
+ case keywords
1176
+ when Enumerable
1177
+ return keywords
1178
+ when String
1179
+ return keywords.scan(/\S+/)
1180
+ else
1181
+ raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
1182
+ end
1183
+ end
1184
+
1185
+ def add_sibling(next_or_previous, node_or_tags)
1186
+ raise("Cannot add sibling to a node with no parent") unless parent
1187
+
1188
+ impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
1189
+ iter = (next_or_previous == :next) ? :reverse_each : :each
1190
+
1191
+ node_or_tags = parent.coerce(node_or_tags)
1192
+ if node_or_tags.is_a?(XML::NodeSet)
1193
+ if text?
1194
+ pivot = Nokogiri::XML::Node.new "dummy", document
1195
+ send impl, pivot
1196
+ else
1197
+ pivot = self
1198
+ end
1199
+ node_or_tags.send(iter) { |n| pivot.send impl, n }
1200
+ pivot.unlink if text?
1201
+ else
1202
+ send impl, node_or_tags
1203
+ end
1204
+ node_or_tags
1205
+ end
1206
+
1207
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1208
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1209
+
1210
+ def to_format(save_option, options)
1211
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
1212
+
1213
+ options[:save_with] = save_option unless options[:save_with]
1214
+ serialize(options)
1215
+ end
1216
+
1217
+ def write_format_to(save_option, io, options)
1218
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
1219
+
1220
+ options[:save_with] ||= save_option
1221
+ write_to io, options
1222
+ end
1223
+
1224
+ def inspect_attributes
1225
+ [:name, :namespace, :attribute_nodes, :children]
1226
+ end
1227
+
1228
+ # @private
1229
+ IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
1230
+
1231
+ def add_child_node_and_reparent_attrs(node)
1232
+ add_child_node node
1233
+ node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
1234
+ attr_node.remove
1235
+ node[attr_node.name] = attr_node.value
1236
+ end
1237
+ end
1238
+ end
1239
+ end
1240
+ end