nokogiri 1.11.1 → 1.12.0.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +232 -11
  3. data/LICENSE.md +1 -1
  4. data/README.md +27 -21
  5. data/dependencies.yml +12 -12
  6. data/ext/nokogiri/depend +35 -474
  7. data/ext/nokogiri/extconf.rb +391 -243
  8. data/ext/nokogiri/gumbo.c +611 -0
  9. data/ext/nokogiri/{html_document.c → html4_document.c} +18 -23
  10. data/ext/nokogiri/html4_element_description.c +294 -0
  11. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  12. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  13. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +29 -27
  14. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  15. data/ext/nokogiri/nokogiri.c +206 -66
  16. data/ext/nokogiri/nokogiri.h +166 -76
  17. data/ext/nokogiri/test_global_handlers.c +3 -4
  18. data/ext/nokogiri/xml_attr.c +15 -15
  19. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  20. data/ext/nokogiri/xml_cdata.c +13 -18
  21. data/ext/nokogiri/xml_comment.c +19 -26
  22. data/ext/nokogiri/xml_document.c +258 -200
  23. data/ext/nokogiri/xml_document_fragment.c +13 -15
  24. data/ext/nokogiri/xml_dtd.c +54 -48
  25. data/ext/nokogiri/xml_element_content.c +31 -26
  26. data/ext/nokogiri/xml_element_decl.c +22 -22
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  28. data/ext/nokogiri/xml_entity_decl.c +32 -30
  29. data/ext/nokogiri/xml_entity_reference.c +16 -18
  30. data/ext/nokogiri/xml_namespace.c +58 -49
  31. data/ext/nokogiri/xml_node.c +473 -414
  32. data/ext/nokogiri/xml_node_set.c +174 -162
  33. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  34. data/ext/nokogiri/xml_reader.c +193 -157
  35. data/ext/nokogiri/xml_relax_ng.c +29 -23
  36. data/ext/nokogiri/xml_sax_parser.c +111 -106
  37. data/ext/nokogiri/xml_sax_parser_context.c +102 -85
  38. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  39. data/ext/nokogiri/xml_schema.c +49 -41
  40. data/ext/nokogiri/xml_syntax_error.c +21 -23
  41. data/ext/nokogiri/xml_text.c +13 -17
  42. data/ext/nokogiri/xml_xpath_context.c +86 -77
  43. data/ext/nokogiri/xslt_stylesheet.c +157 -156
  44. data/gumbo-parser/CHANGES.md +63 -0
  45. data/gumbo-parser/Makefile +101 -0
  46. data/gumbo-parser/THANKS +27 -0
  47. data/gumbo-parser/src/Makefile +17 -0
  48. data/gumbo-parser/src/README.md +41 -0
  49. data/gumbo-parser/src/ascii.c +75 -0
  50. data/gumbo-parser/src/ascii.h +115 -0
  51. data/gumbo-parser/src/attribute.c +42 -0
  52. data/gumbo-parser/src/attribute.h +17 -0
  53. data/gumbo-parser/src/char_ref.c +22225 -0
  54. data/gumbo-parser/src/char_ref.h +29 -0
  55. data/gumbo-parser/src/char_ref.rl +2154 -0
  56. data/gumbo-parser/src/error.c +626 -0
  57. data/gumbo-parser/src/error.h +148 -0
  58. data/gumbo-parser/src/foreign_attrs.c +104 -0
  59. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  60. data/gumbo-parser/src/gumbo.h +943 -0
  61. data/gumbo-parser/src/insertion_mode.h +33 -0
  62. data/gumbo-parser/src/macros.h +91 -0
  63. data/gumbo-parser/src/parser.c +4886 -0
  64. data/gumbo-parser/src/parser.h +41 -0
  65. data/gumbo-parser/src/replacement.h +33 -0
  66. data/gumbo-parser/src/string_buffer.c +103 -0
  67. data/gumbo-parser/src/string_buffer.h +68 -0
  68. data/gumbo-parser/src/string_piece.c +48 -0
  69. data/gumbo-parser/src/svg_attrs.c +174 -0
  70. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  71. data/gumbo-parser/src/svg_tags.c +137 -0
  72. data/gumbo-parser/src/svg_tags.gperf +55 -0
  73. data/gumbo-parser/src/tag.c +222 -0
  74. data/gumbo-parser/src/tag_lookup.c +382 -0
  75. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  76. data/gumbo-parser/src/tag_lookup.h +13 -0
  77. data/gumbo-parser/src/token_buffer.c +79 -0
  78. data/gumbo-parser/src/token_buffer.h +71 -0
  79. data/gumbo-parser/src/token_type.h +17 -0
  80. data/gumbo-parser/src/tokenizer.c +3463 -0
  81. data/gumbo-parser/src/tokenizer.h +112 -0
  82. data/gumbo-parser/src/tokenizer_states.h +339 -0
  83. data/gumbo-parser/src/utf8.c +245 -0
  84. data/gumbo-parser/src/utf8.h +164 -0
  85. data/gumbo-parser/src/util.c +68 -0
  86. data/gumbo-parser/src/util.h +30 -0
  87. data/gumbo-parser/src/vector.c +111 -0
  88. data/gumbo-parser/src/vector.h +45 -0
  89. data/lib/nokogiri.rb +31 -50
  90. data/lib/nokogiri/css.rb +14 -14
  91. data/lib/nokogiri/css/parser.rb +2 -2
  92. data/lib/nokogiri/css/parser.y +1 -1
  93. data/lib/nokogiri/css/syntax_error.rb +1 -1
  94. data/lib/nokogiri/extension.rb +26 -0
  95. data/lib/nokogiri/gumbo.rb +14 -0
  96. data/lib/nokogiri/html.rb +31 -27
  97. data/lib/nokogiri/html4.rb +40 -0
  98. data/lib/nokogiri/{html → html4}/builder.rb +2 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +4 -4
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +17 -17
  101. data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
  102. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
  105. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
  107. data/lib/nokogiri/html5.rb +473 -0
  108. data/lib/nokogiri/html5/document.rb +74 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  110. data/lib/nokogiri/html5/node.rb +93 -0
  111. data/lib/nokogiri/version/constant.rb +1 -1
  112. data/lib/nokogiri/version/info.rb +42 -9
  113. data/lib/nokogiri/xml.rb +35 -36
  114. data/lib/nokogiri/xml/document.rb +74 -28
  115. data/lib/nokogiri/xml/node.rb +45 -47
  116. data/lib/nokogiri/xml/parse_options.rb +2 -0
  117. data/lib/nokogiri/xml/pp.rb +2 -2
  118. data/lib/nokogiri/xml/reader.rb +2 -9
  119. data/lib/nokogiri/xml/sax.rb +4 -4
  120. data/lib/nokogiri/xml/sax/document.rb +24 -30
  121. data/lib/nokogiri/xml/xpath.rb +3 -5
  122. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  123. data/lib/nokogiri/xslt.rb +16 -16
  124. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  125. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  126. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  127. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  128. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
  129. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  130. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  131. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  132. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  133. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  134. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  135. metadata +117 -109
  136. data/ext/nokogiri/html_document.h +0 -10
  137. data/ext/nokogiri/html_element_description.c +0 -279
  138. data/ext/nokogiri/html_element_description.h +0 -10
  139. data/ext/nokogiri/html_entity_lookup.c +0 -32
  140. data/ext/nokogiri/html_entity_lookup.h +0 -8
  141. data/ext/nokogiri/html_sax_parser_context.c +0 -118
  142. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  143. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  144. data/ext/nokogiri/xml_attr.h +0 -9
  145. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  146. data/ext/nokogiri/xml_cdata.h +0 -9
  147. data/ext/nokogiri/xml_comment.h +0 -9
  148. data/ext/nokogiri/xml_document.h +0 -23
  149. data/ext/nokogiri/xml_document_fragment.h +0 -10
  150. data/ext/nokogiri/xml_dtd.h +0 -10
  151. data/ext/nokogiri/xml_element_content.h +0 -10
  152. data/ext/nokogiri/xml_element_decl.h +0 -9
  153. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  154. data/ext/nokogiri/xml_entity_decl.h +0 -10
  155. data/ext/nokogiri/xml_entity_reference.h +0 -9
  156. data/ext/nokogiri/xml_io.c +0 -63
  157. data/ext/nokogiri/xml_io.h +0 -11
  158. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  159. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  160. data/ext/nokogiri/xml_namespace.h +0 -14
  161. data/ext/nokogiri/xml_node.h +0 -13
  162. data/ext/nokogiri/xml_node_set.h +0 -12
  163. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  164. data/ext/nokogiri/xml_reader.h +0 -10
  165. data/ext/nokogiri/xml_relax_ng.h +0 -9
  166. data/ext/nokogiri/xml_sax_parser.h +0 -39
  167. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  168. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  169. data/ext/nokogiri/xml_schema.h +0 -9
  170. data/ext/nokogiri/xml_syntax_error.h +0 -25
  171. data/ext/nokogiri/xml_text.h +0 -9
  172. data/ext/nokogiri/xml_xpath_context.h +0 -10
  173. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  174. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  175. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  176. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  177. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  178. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  179. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  require 'pathname'
@@ -13,11 +14,12 @@ module Nokogiri
13
14
  # Nokogiri::XML::Searchable#xpath
14
15
  #
15
16
  class Document < Nokogiri::XML::Node
16
- # I'm ignoring unicode characters here.
17
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
17
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
19
+ # characters in NCNAMEs.
18
20
  NCNAME_START_CHAR = "A-Za-z_"
19
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
20
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
21
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
23
 
22
24
  ##
23
25
  # Parse an XML file.
@@ -79,6 +81,35 @@ module Nokogiri
79
81
  return doc
80
82
  end
81
83
 
84
+ ##
85
+ # @!method wrap(java_document)
86
+ # @!scope class
87
+ #
88
+ # Create a {Document} using an existing Java DOM document object.
89
+ #
90
+ # The returned {Document} shares the same underlying data structure as the Java object, so
91
+ # changes in one are reflected in the other.
92
+ #
93
+ # @param java_document [Java::OrgW3cDom::Document]
94
+ # @return [Nokogiri::XML::Document]
95
+ # @note This method is only available when running JRuby.
96
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
97
+ # @see #to_java
98
+
99
+ ##
100
+ # @!method to_java()
101
+ #
102
+ # Returns the underlying Java DOM document object for the {Document}.
103
+ #
104
+ # The returned Java object shares the same underlying data structure as the {Document}, so
105
+ # changes in one are reflected in the other.
106
+ #
107
+ # @return [Java::OrgW3cDom::Document]
108
+ # @note This method is only available when running JRuby.
109
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
110
+ # @see .wrap
111
+
112
+
82
113
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
83
114
  attr_accessor :errors
84
115
 
@@ -88,33 +119,58 @@ module Nokogiri
88
119
  end
89
120
 
90
121
  ##
91
- # Create an element with +name+, and optionally setting the content and attributes.
122
+ # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
123
+ # setting contents or attributes.
124
+ #
125
+ # Arguments may be passed to initialize the element:
126
+ # - a +Hash+ argument will be used to set attributes
127
+ # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
92
128
  #
93
- # doc.create_element "div" # <div></div>
94
- # doc.create_element "div", :class => "container" # <div class='container'></div>
95
- # doc.create_element "div", "contents" # <div>contents</div>
96
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
97
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
129
+ # A block may be passed to mutate the node.
98
130
  #
99
- def create_element name, *args, &block
131
+ # @param name [String]
132
+ # @param contents_or_attrs [#to_s,Hash]
133
+ # @yieldparam node [Nokogiri::XML::Element]
134
+ # @return [Nokogiri::XML::Element]
135
+ #
136
+ # @example An empty element without attributes
137
+ # doc.create_element("div")
138
+ # # => <div></div>
139
+ #
140
+ # @example An element with contents
141
+ # doc.create_element("div", "contents")
142
+ # # => <div>contents</div>
143
+ #
144
+ # @example An element with attributes
145
+ # doc.create_element("div", {"class" => "container"})
146
+ # # => <div class='container'></div>
147
+ #
148
+ # @example An element with contents and attributes
149
+ # doc.create_element("div", "contents", {"class" => "container"})
150
+ # # => <div class='container'>contents</div>
151
+ #
152
+ # @example Passing a block to mutate the element
153
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
154
+ #
155
+ def create_element(name, *contents_or_attrs, &block)
100
156
  elm = Nokogiri::XML::Element.new(name, self, &block)
101
- args.each do |arg|
157
+ contents_or_attrs.each do |arg|
102
158
  case arg
103
159
  when Hash
104
- arg.each { |k,v|
160
+ arg.each do |k, v|
105
161
  key = k.to_s
106
162
  if key =~ NCNAME_RE
107
- ns_name = key.split(":", 2)[1]
108
- elm.add_namespace_definition ns_name, v
163
+ ns_name = Regexp.last_match(1)
164
+ elm.add_namespace_definition(ns_name, v)
109
165
  else
110
166
  elm[k.to_s] = v.to_s
111
167
  end
112
- }
168
+ end
113
169
  else
114
170
  elm.content = arg
115
171
  end
116
172
  end
117
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
173
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
118
174
  elm.namespace = ns
119
175
  end
120
176
  elm
@@ -262,24 +318,14 @@ module Nokogiri
262
318
  end
263
319
  alias :<< :add_child
264
320
 
265
- ##
266
- # +JRuby+
267
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
268
- def self.wrap(document) end if false # native-ext provides Document.wrap
269
-
270
- ##
271
- # +JRuby+
272
- # Returns Java's org.w3c.dom.document of this Document.
273
- def to_java; end if false # JRuby provides #to_java
274
-
275
321
  private
322
+
276
323
  def self.empty_doc? string_or_io
277
324
  string_or_io.nil? ||
278
325
  (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
279
326
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
280
327
  end
281
328
 
282
- # @private
283
329
  IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
284
330
 
285
331
  def inspect_attributes
@@ -1,68 +1,57 @@
1
1
  # encoding: UTF-8
2
2
  # frozen_string_literal: true
3
3
  require "stringio"
4
- require "nokogiri/xml/node/save_options"
5
4
 
6
5
  module Nokogiri
7
6
  module XML
8
- ####
9
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
10
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
11
- # to a hash with regard to attributes. For example (from irb):
7
+ ##
8
+ # {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
9
+ # tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
10
+ # example:
12
11
  #
13
- # irb(main):004:0> node
14
- # => <a href="#foo" id="link">link</a>
15
- # irb(main):005:0> node['href']
16
- # => "#foo"
17
- # irb(main):006:0> node.keys
18
- # => ["href", "id"]
19
- # irb(main):007:0> node.values
20
- # => ["#foo", "link"]
21
- # irb(main):008:0> node['class'] = 'green'
22
- # => "green"
23
- # irb(main):009:0> node
24
- # => <a href="#foo" id="link" class="green">link</a>
25
- # irb(main):010:0>
12
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
13
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
14
+ # node['href'] # => "#foo"
15
+ # node.keys # => ["href", "id"]
16
+ # node.values # => ["#foo", "link"]
17
+ # node['class'] = 'green' # => "green"
18
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
26
19
  #
27
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
20
+ # See the method group entitled "Working With Node Attributes" for the full set of methods.
28
21
  #
29
- # Nokogiri::XML::Node also has methods that let you move around your
22
+ # {Nokogiri::XML::Node} also has methods that let you move around your
30
23
  # tree. For navigating your tree, see:
31
24
  #
32
- # * Nokogiri::XML::Node#parent
33
- # * Nokogiri::XML::Node#children
34
- # * Nokogiri::XML::Node#next
35
- # * Nokogiri::XML::Node#previous
36
- #
25
+ # * {#parent}
26
+ # * {#children}
27
+ # * {#next}
28
+ # * {#previous}
37
29
  #
38
30
  # When printing or otherwise emitting a document or a node (and
39
31
  # its subtree), there are a few methods you might want to use:
40
32
  #
41
- # * content, text, inner_text, to_str: emit plaintext
42
- #
43
- # These methods will all emit the plaintext version of your
44
- # document, meaning that entities will be replaced (e.g., "&lt;"
45
- # will be replaced with "<"), meaning that any sanitizing will
46
- # likely be un-done in the output.
33
+ # * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
34
+ # meaning that entities will be replaced (e.g., "&lt;" will be replaced with "<"), meaning
35
+ # that any sanitizing will likely be un-done in the output.
47
36
  #
48
- # * to_s, to_xml, to_html, inner_html: emit well-formed markup
37
+ # * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
38
+ # properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
39
+ # parsers, etc.
49
40
  #
50
- # These methods will all emit properly-escaped markup, meaning
51
- # that it's suitable for consumption by browsers, parsers, etc.
41
+ # You may search this node's subtree using {#xpath} and {#css}
52
42
  #
53
- # You may search this node's subtree using Searchable#xpath and Searchable#css
54
43
  class Node
55
44
  include Nokogiri::XML::PP::Node
56
45
  include Nokogiri::XML::Searchable
57
46
  include Enumerable
58
47
 
59
- # Element node type, see Nokogiri::XML::Node#element?
48
+ # Element node type, see {Nokogiri::XML::Node#element?}
60
49
  ELEMENT_NODE = 1
61
50
  # Attribute node type
62
51
  ATTRIBUTE_NODE = 2
63
- # Text node type, see Nokogiri::XML::Node#text?
52
+ # Text node type, see {Nokogiri::XML::Node#text?}
64
53
  TEXT_NODE = 3
65
- # CDATA node type, see Nokogiri::XML::Node#cdata?
54
+ # CDATA node type, see {Nokogiri::XML::Node#cdata?}
66
55
  CDATA_SECTION_NODE = 4
67
56
  # Entity reference node type
68
57
  ENTITY_REF_NODE = 5
@@ -70,9 +59,9 @@ module Nokogiri
70
59
  ENTITY_NODE = 6
71
60
  # PI node type
72
61
  PI_NODE = 7
73
- # Comment node type, see Nokogiri::XML::Node#comment?
62
+ # Comment node type, see {Nokogiri::XML::Node#comment?}
74
63
  COMMENT_NODE = 8
75
- # Document node type, see Nokogiri::XML::Node#xml?
64
+ # Document node type, see {Nokogiri::XML::Node#xml?}
76
65
  DOCUMENT_NODE = 9
77
66
  # Document type node type
78
67
  DOCUMENT_TYPE_NODE = 10
@@ -80,7 +69,7 @@ module Nokogiri
80
69
  DOCUMENT_FRAG_NODE = 11
81
70
  # Notation node type
82
71
  NOTATION_NODE = 12
83
- # HTML document node type, see Nokogiri::XML::Node#html?
72
+ # HTML document node type, see {Nokogiri::XML::Node#html?}
84
73
  HTML_DOCUMENT_NODE = 13
85
74
  # DTD node type
86
75
  DTD_NODE = 14
@@ -99,8 +88,15 @@ module Nokogiri
99
88
  # DOCB document node type
100
89
  DOCB_DOCUMENT_NODE = 21
101
90
 
102
- def initialize(name, document) # :nodoc:
103
- # ... Ya. This is empty on purpose.
91
+ ##
92
+ # Create a new node with +name+ sharing GC lifecycle with +document+.
93
+ # @param name [String]
94
+ # @param document [Nokogiri::XML::Document]
95
+ # @yieldparam node [Nokogiri::XML::Node]
96
+ # @return [Nokogiri::XML::Node]
97
+ # @see Nokogiri::XML::Node.new
98
+ def initialize(name, document)
99
+ # This is intentionally empty.
104
100
  end
105
101
 
106
102
  ###
@@ -840,7 +836,7 @@ module Nokogiri
840
836
  node_set = in_context(contents, options.to_i)
841
837
  if (node_set.empty? && (document.errors.length > error_count))
842
838
  if options.recover?
843
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
839
+ fragment = Nokogiri::HTML4::DocumentFragment.parse contents
844
840
  node_set = fragment.children
845
841
  else
846
842
  raise document.errors[error_count]
@@ -886,7 +882,7 @@ module Nokogiri
886
882
  type == DOCUMENT_NODE
887
883
  end
888
884
 
889
- # Returns true if this is an HTML::Document node
885
+ # Returns true if this is an HTML4::Document node
890
886
  def html?
891
887
  type == HTML_DOCUMENT_NODE
892
888
  end
@@ -912,11 +908,11 @@ module Nokogiri
912
908
  end
913
909
 
914
910
  ###
915
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
911
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
916
912
  # nil on XML documents and on unknown tags.
917
913
  def description
918
914
  return nil if document.xml?
919
- Nokogiri::HTML::ElementDescription[name]
915
+ Nokogiri::HTML4::ElementDescription[name]
920
916
  end
921
917
 
922
918
  ###
@@ -1238,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1238
1234
  end
1239
1235
  end
1240
1236
  end
1237
+
1238
+ require_relative "node/save_options"
@@ -71,6 +71,8 @@ module Nokogiri
71
71
 
72
72
  # the default options used for parsing XML documents
73
73
  DEFAULT_XML = RECOVER | NONET
74
+ # the default options used for parsing XSLT stylesheets
75
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
74
76
  # the default options used for parsing HTML documents
75
77
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76
78
  # the default options used for parsing XML schemas
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/pp/node'
3
- require 'nokogiri/xml/pp/character_data'
2
+ require_relative "pp/node"
3
+ require_relative "pp/character_data"
@@ -86,7 +86,8 @@ module Nokogiri
86
86
  private :initialize
87
87
 
88
88
  ###
89
- # Get a list of attributes for the current node.
89
+ # Get the attributes of the current node as a Hash
90
+ # @return [Hash<String, String>] Attribute names and values
90
91
  def attributes
91
92
  attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
92
93
  hash[node.name] = node.to_s
@@ -96,14 +97,6 @@ module Nokogiri
96
97
  attrs_hash
97
98
  end
98
99
 
99
- ###
100
- # Get a list of attributes for the current node
101
- def attribute_nodes
102
- nodes = attr_nodes
103
- nodes.each { |v| v.instance_variable_set(:@_r, self) }
104
- nodes
105
- end
106
-
107
100
  ###
108
101
  # Move the cursor through the document yielding the cursor to the block
109
102
  def each
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/sax/document'
3
- require 'nokogiri/xml/sax/parser_context'
4
- require 'nokogiri/xml/sax/parser'
5
- require 'nokogiri/xml/sax/push_parser'
2
+ require_relative "sax/document"
3
+ require_relative "sax/parser_context"
4
+ require_relative "sax/parser"
5
+ require_relative "sax/push_parser"
@@ -2,20 +2,19 @@
2
2
  module Nokogiri
3
3
  module XML
4
4
  ###
5
- # SAX Parsers are event driven parsers. Nokogiri provides two different
6
- # event based parsers when dealing with XML. If you want to do SAX style
7
- # parsing using HTML, check out Nokogiri::HTML::SAX.
5
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
6
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
7
+ # Nokogiri::HTML4::SAX.
8
8
  #
9
- # The basic way a SAX style parser works is by creating a parser,
10
- # telling the parser about the events we're interested in, then giving
11
- # the parser some XML to process. The parser will notify you when
12
- # it encounters events you said you would like to know about.
9
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
10
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
11
+ # you when it encounters events you said you would like to know about.
13
12
  #
14
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
15
- # and implement the methods for which you would like notification.
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
14
+ # methods for which you would like notification.
16
15
  #
17
- # For example, if I want to be notified when a document ends, and when an
18
- # element starts, I would write a class like this:
16
+ # For example, if I want to be notified when a document ends, and when an element starts, I
17
+ # would write a class like this:
19
18
  #
20
19
  # class MyDocument < Nokogiri::XML::SAX::Document
21
20
  # def end_document
@@ -27,8 +26,7 @@ module Nokogiri
27
26
  # end
28
27
  # end
29
28
  #
30
- # Then I would instantiate a SAX parser with this document, and feed the
31
- # parser some XML
29
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
32
30
  #
33
31
  # # Create a new parser
34
32
  # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
@@ -36,25 +34,21 @@ module Nokogiri
36
34
  # # Feed the parser some XML
37
35
  # parser.parse(File.open(ARGV[0]))
38
36
  #
39
- # Now my document handler will be called when each node starts, and when
40
- # then document ends. To see what kinds of events are available, take
41
- # a look at Nokogiri::XML::SAX::Document.
37
+ # Now my document handler will be called when each node starts, and when then document ends. To
38
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
42
39
  #
43
- # Two SAX parsers for XML are available, a parser that reads from a string
44
- # or IO object as it feels necessary, and a parser that lets you spoon
45
- # feed it XML. If you want to let Nokogiri deal with reading your XML,
46
- # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
40
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
41
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
42
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
47
43
  # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
48
44
  module SAX
49
45
  ###
50
- # This class is used for registering types of events you are interested
51
- # in handling. All of the methods on this class are available as
52
- # possible events while parsing an XML document. To register for any
53
- # particular event, just subclass this class and implement the methods
54
- # you are interested in knowing about.
46
+ # This class is used for registering types of events you are interested in handling. All of
47
+ # the methods on this class are available as possible events while parsing an XML document. To
48
+ # register for any particular event, just subclass this class and implement the methods you
49
+ # are interested in knowing about.
55
50
  #
56
- # To only be notified about start and end element events, write a class
57
- # like this:
51
+ # To only be notified about start and end element events, write a class like this:
58
52
  #
59
53
  # class MyDocument < Nokogiri::XML::SAX::Document
60
54
  # def start_element name, attrs = []
@@ -66,8 +60,8 @@ module Nokogiri
66
60
  # end
67
61
  # end
68
62
  #
69
- # You can use this event handler for any SAX style parser included with
70
- # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
63
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
64
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
71
65
  class Document
72
66
  ###
73
67
  # Called when an XML declaration is parsed
@@ -129,7 +123,7 @@ module Nokogiri
129
123
  end
130
124
 
131
125
  ###
132
- # Characters read between a tag. This method might be called multiple
126
+ # Characters read between a tag. This method might be called multiple
133
127
  # times given one contiguous string of characters.
134
128
  #
135
129
  # +string+ contains the character data