nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,16 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # :nodoc: all
3
6
  module PP
4
7
  module CharacterData
5
- def pretty_print pp # :nodoc:
6
- nice_name = self.class.name.split('::').last
7
- pp.group(2, "#(#{nice_name} ", ')') do
8
- pp.pp text
8
+ def pretty_print(pp)
9
+ nice_name = self.class.name.split("::").last
10
+ pp.group(2, "#(#{nice_name} ", ")") do
11
+ pp.pp(text)
9
12
  end
10
13
  end
11
14
 
12
- def inspect # :nodoc:
13
- "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
15
+ def inspect
16
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
14
17
  end
15
18
  end
16
19
  end
@@ -1,53 +1,70 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # :nodoc: all
3
6
  module PP
4
7
  module Node
5
- def inspect # :nodoc:
6
- attributes = inspect_attributes.reject { |x|
7
- begin
8
- attribute = send x
9
- !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
10
- rescue NoMethodError
11
- true
12
- end
13
- }.map { |attribute|
14
- "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
15
- }.join ' '
16
- "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
17
- end
8
+ COLLECTIONS = [:attribute_nodes, :children]
18
9
 
19
- def pretty_print pp # :nodoc:
20
- nice_name = self.class.name.split('::').last
21
- pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
10
+ def inspect
11
+ # handle the case where an exception is thrown during object construction
12
+ if respond_to?(:data_ptr?) && !data_ptr?
13
+ return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
14
+ end
22
15
 
16
+ attributes = inspect_attributes.reject do |x|
17
+ attribute = send(x)
18
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
19
+ rescue NoMethodError
20
+ true
21
+ end
22
+ attributes = if inspect_attributes.length == 1
23
+ send(attributes.first).inspect
24
+ else
25
+ attributes.map do |attribute|
26
+ "#{attribute}=#{send(attribute).inspect}"
27
+ end.join(" ")
28
+ end
29
+ "#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
30
+ end
31
+
32
+ def pretty_print(pp)
33
+ nice_name = self.class.name.split("::").last
34
+ pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
23
35
  pp.breakable
24
- attrs = inspect_attributes.map { |t|
36
+
37
+ attrs = inspect_attributes.filter_map do |t|
25
38
  [t, send(t)] if respond_to?(t)
26
- }.compact.find_all { |x|
39
+ end.find_all do |x|
27
40
  if x.last
28
- if [:attribute_nodes, :children].include? x.first
41
+ if COLLECTIONS.include?(x.first)
29
42
  !x.last.empty?
30
43
  else
31
44
  true
32
45
  end
33
46
  end
34
- }
47
+ end
35
48
 
36
- pp.seplist(attrs) do |v|
37
- if [:attribute_nodes, :children].include? v.first
38
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
39
- pp.breakable
40
- pp.seplist(v.last) do |item|
41
- pp.pp item
49
+ if inspect_attributes.length == 1
50
+ pp.pp(attrs.first.last)
51
+ else
52
+ pp.seplist(attrs) do |v|
53
+ if COLLECTIONS.include?(v.first)
54
+ pp.group(2, "#{v.first} = [", "]") do
55
+ pp.breakable
56
+ pp.seplist(v.last) do |item|
57
+ pp.pp(item)
58
+ end
42
59
  end
60
+ else
61
+ pp.text("#{v.first} = ")
62
+ pp.pp(v.last)
43
63
  end
44
- else
45
- pp.text "#{v.first} = "
46
- pp.pp v.last
47
64
  end
48
65
  end
49
- pp.breakable
50
66
 
67
+ pp.breakable
51
68
  end
52
69
  end
53
70
  end
@@ -1,2 +1,4 @@
1
- require 'nokogiri/xml/pp/node'
2
- require 'nokogiri/xml/pp/character_data'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pp/node"
4
+ require_relative "pp/character_data"
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class ProcessingInstruction < Node
4
- def initialize document, name, content
6
+ def initialize(document, name, content)
7
+ super(document, name)
5
8
  end
6
9
  end
7
10
  end
@@ -1,32 +1,36 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
4
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
5
- # would move. The Reader is given an XML document, and yields nodes
6
- # to an each block.
6
+ # The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
7
+ # call Nokogiri::XML::Reader#each to iterate over each node.
7
8
  #
8
- # Here is an example of usage:
9
+ # Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
10
+ # Reader is given an \XML document, and yields nodes to an each block.
9
11
  #
10
- # reader = Nokogiri::XML::Reader(<<-eoxml)
11
- # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
12
- # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
13
- # </x>
14
- # eoxml
12
+ # The Reader parser might be good for when you need the speed and low memory usage of a \SAX
13
+ # parser, but do not want to write a SAX::Document handler.
15
14
  #
16
- # reader.each do |node|
15
+ # Here is an example of usage:
17
16
  #
18
- # # node is an instance of Nokogiri::XML::Reader
19
- # puts node.name
17
+ # reader = Nokogiri::XML::Reader.new <<~XML
18
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
19
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
20
+ # </x>
21
+ # XML
20
22
  #
21
- # end
23
+ # reader.each do |node|
24
+ # # node is an instance of Nokogiri::XML::Reader
25
+ # puts node.name
26
+ # end
22
27
  #
23
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
24
- # the cursor moves through the entire document, you must parse the
25
- # document again. So make sure that you capture any information you
26
- # need during the first iteration.
28
+ # Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
29
+ # document, you must parse the document again. It may be better to capture all information you
30
+ # need during a single iteration.
27
31
  #
28
- # The Reader parser is good for when you need the speed of a SAX parser,
29
- # but do not want to write a Document handler.
32
+ # libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
33
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
30
34
  class Reader
31
35
  include Enumerable
32
36
 
@@ -63,47 +67,70 @@ module Nokogiri
63
67
  TYPE_END_ELEMENT = 15
64
68
  # Entity end node type
65
69
  TYPE_END_ENTITY = 16
66
- # XML Declaration node type
70
+ # \XML Declaration node type
67
71
  TYPE_XML_DECLARATION = 17
68
72
 
69
73
  # A list of errors encountered while parsing
70
74
  attr_accessor :errors
71
75
 
72
- # The encoding for the document
73
- attr_reader :encoding
74
-
75
- # The XML source
76
+ # The \XML source
76
77
  attr_reader :source
77
78
 
78
- alias :self_closing? :empty_element?
79
+ alias_method :self_closing?, :empty_element?
80
+
81
+ # :call-seq:
82
+ # Reader.new(input) { |options| ... } → Reader
83
+ # Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
84
+ #
85
+ # Create a new Reader to parse an \XML document.
86
+ #
87
+ # [Required Parameters]
88
+ # - +input+ (String | IO): The \XML document to parse.
89
+ #
90
+ # [Optional Parameters]
91
+ # - +url:+ (String) The base URL of the document.
92
+ # - +encoding:+ (String) The name of the encoding of the document.
93
+ # - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
94
+ # Defaults to +ParseOptions::STRICT+.
95
+ #
96
+ # [Yields]
97
+ # If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
98
+ # the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
99
+ def self.new(
100
+ string_or_io,
101
+ url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
102
+ url: url_, encoding: encoding_, options: options_
103
+ )
104
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
105
+ yield options if block_given?
106
+
107
+ if string_or_io.respond_to?(:read)
108
+ return Reader.from_io(string_or_io, url, encoding, options.to_i)
109
+ end
79
110
 
80
- def initialize source, url = nil, encoding = nil # :nodoc:
111
+ Reader.from_memory(string_or_io, url, encoding, options.to_i)
112
+ end
113
+
114
+ private def initialize(source, url = nil, encoding = nil) # :nodoc:
81
115
  @source = source
82
116
  @errors = []
83
117
  @encoding = encoding
84
118
  end
85
- private :initialize
86
119
 
87
- ###
88
- # Get a list of attributes for the current node.
120
+ # Get the attributes and namespaces of the current node as a Hash.
121
+ #
122
+ # This is the union of Reader#attribute_hash and Reader#namespaces
123
+ #
124
+ # [Returns]
125
+ # (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
89
126
  def attributes
90
- Hash[attribute_nodes.map { |node|
91
- [node.name, node.to_s]
92
- }].merge(namespaces || {})
93
- end
94
-
95
- ###
96
- # Get a list of attributes for the current node
97
- def attribute_nodes
98
- nodes = attr_nodes
99
- nodes.each { |v| v.instance_variable_set(:@_r, self) }
100
- nodes
127
+ attribute_hash.merge(namespaces)
101
128
  end
102
129
 
103
130
  ###
104
131
  # Move the cursor through the document yielding the cursor to the block
105
132
  def each
106
- while cursor = self.read
133
+ while (cursor = read)
107
134
  yield cursor
108
135
  end
109
136
  end
@@ -1,32 +1,75 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class << self
4
- ###
5
- # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
- # See Nokogiri::XML::RelaxNG for an example.
7
- def RelaxNG string_or_io
8
- RelaxNG.new(string_or_io)
6
+ # :call-seq:
7
+ # RelaxNG(input) Nokogiri::XML::RelaxNG
8
+ # RelaxNG(input, options:) Nokogiri::XML::RelaxNG
9
+ #
10
+ # Convenience method for Nokogiri::XML::RelaxNG.new
11
+ def RelaxNG(...)
12
+ RelaxNG.new(...)
9
13
  end
10
14
  end
11
15
 
12
- ###
13
- # Nokogiri::XML::RelaxNG is used for validating XML against a
14
- # RelaxNG schema.
16
+ # Nokogiri::XML::RelaxNG is used for validating \XML against a RELAX NG schema definition.
17
+ #
18
+ # 🛡 <b>Do not use this class for untrusted schema documents.</b> RELAX NG input is always
19
+ # treated as *trusted*, meaning that the underlying parsing libraries <b>will access network
20
+ # resources</b>. This is counter to Nokogiri's "untrusted by default" security policy, but is an
21
+ # unfortunate limitation of the underlying libraries.
22
+ #
23
+ # *Example:* Determine whether an \XML document is valid.
24
+ #
25
+ # schema = Nokogiri::XML::RelaxNG.new(File.read(RELAX_NG_FILE))
26
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
27
+ # schema.valid?(doc) # Boolean
15
28
  #
16
- # == Synopsis
29
+ # *Example:* Validate an \XML document against a \RelaxNG schema, and capture any errors that are found.
17
30
  #
18
- # Validate an XML document against a RelaxNG schema. Loop over the errors
19
- # that are returned and print them out:
31
+ # schema = Nokogiri::XML::RelaxNG.new(File.open(RELAX_NG_FILE))
32
+ # doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
33
+ # errors = schema.validate(doc) # Array<SyntaxError>
20
34
  #
21
- # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
22
- # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
35
+ # *Example:* Validate an \XML document using a Document containing a RELAX NG schema definition.
23
36
  #
24
- # schema.validate(doc).each do |error|
25
- # puts error.message
26
- # end
37
+ # schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
38
+ # schema = Nokogiri::XML::RelaxNG.from_document(schema_doc)
39
+ # doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
40
+ # schema.valid?(doc) # Boolean
27
41
  #
28
- # The list of errors are Nokogiri::XML::SyntaxError objects.
29
42
  class RelaxNG < Nokogiri::XML::Schema
43
+ # :call-seq:
44
+ # new(input) → Nokogiri::XML::RelaxNG
45
+ # new(input, options:) → Nokogiri::XML::RelaxNG
46
+ #
47
+ # Parse a RELAX NG schema definition from a String or IO to create a new Nokogiri::XML::RelaxNG.
48
+ #
49
+ # [Parameters]
50
+ # - +input+ (String | IO) RELAX NG schema definition
51
+ # - +options:+ (Nokogiri::XML::ParseOptions)
52
+ # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA ⚠ Unused
53
+ #
54
+ # [Returns] Nokogiri::XML::RelaxNG
55
+ #
56
+ # ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
57
+ # future functionality.
58
+ #
59
+ # Also see convenience method Nokogiri::XML::RelaxNG()
60
+ def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, options: parse_options_)
61
+ from_document(Nokogiri::XML::Document.parse(input), options)
62
+ end
63
+
64
+ # :call-seq:
65
+ # read_memory(input) → Nokogiri::XML::RelaxNG
66
+ # read_memory(input, options:) → Nokogiri::XML::RelaxNG
67
+ #
68
+ # Convenience method for Nokogiri::XML::RelaxNG.new.
69
+ def self.read_memory(...)
70
+ # TODO deprecate this method
71
+ new(...)
72
+ end
30
73
  end
31
74
  end
32
75
  end