nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,14 +1,127 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
4
6
  ###
5
- # Context for XML SAX parsers. This class is usually not instantiated
6
- # by the user. Instead, you should be looking at
7
- # Nokogiri::XML::SAX::Parser
7
+ # Context object to invoke the XML SAX parser on the SAX::Document handler.
8
+ #
9
+ # 💡 This class is usually not instantiated by the user. Use Nokogiri::XML::SAX::Parser
10
+ # instead.
8
11
  class ParserContext
9
- def self.new thing, encoding = 'UTF-8'
10
- [:read, :close].all? { |x| thing.respond_to?(x) } ?
11
- io(thing, Parser::ENCODINGS[encoding]) : memory(thing)
12
+ class << self
13
+ ###
14
+ # :call-seq:
15
+ # new(input)
16
+ # new(input, encoding)
17
+ #
18
+ # Create a parser context for an IO or a String. This is a shorthand method for
19
+ # ParserContext.io and ParserContext.memory.
20
+ #
21
+ # [Parameters]
22
+ # - +input+ (IO, String) A String or a readable IO object
23
+ # - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
24
+ # encoding to use (default +nil+, encoding will be autodetected)
25
+ #
26
+ # If +input+ quacks like a readable IO object, this method forwards to ParserContext.io,
27
+ # otherwise it forwards to ParserContext.memory.
28
+ #
29
+ # [Returns] Nokogiri::XML::SAX::ParserContext
30
+ #
31
+ def new(input, encoding = nil)
32
+ if [:read, :close].all? { |x| input.respond_to?(x) }
33
+ io(input, encoding)
34
+ else
35
+ memory(input, encoding)
36
+ end
37
+ end
38
+
39
+ ###
40
+ # :call-seq:
41
+ # io(input)
42
+ # io(input, encoding)
43
+ #
44
+ # Create a parser context for an +input+ IO which will assume +encoding+
45
+ #
46
+ # [Parameters]
47
+ # - +io+ (IO) The readable IO object from which to read input
48
+ # - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
49
+ # encoding to use (default +nil+, encoding will be autodetected)
50
+ #
51
+ # [Returns] Nokogiri::XML::SAX::ParserContext
52
+ #
53
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse
54
+ # methods which are more convenient for most use cases.
55
+ #
56
+ def io(input, encoding = nil)
57
+ native_io(input, resolve_encoding(encoding))
58
+ end
59
+
60
+ ###
61
+ # :call-seq:
62
+ # memory(input)
63
+ # memory(input, encoding)
64
+ #
65
+ # Create a parser context for the +input+ String.
66
+ #
67
+ # [Parameters]
68
+ # - +input+ (String) The input string to be parsed.
69
+ # - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
70
+ # use (default +nil+, encoding will be autodetected)
71
+ #
72
+ # [Returns] Nokogiri::XML::SAX::ParserContext
73
+ #
74
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse methods
75
+ # which are more convenient for most use cases.
76
+ #
77
+ def memory(input, encoding = nil)
78
+ native_memory(input, resolve_encoding(encoding))
79
+ end
80
+
81
+ ###
82
+ # :call-seq:
83
+ # file(path)
84
+ # file(path, encoding)
85
+ #
86
+ # Create a parser context for the file at +path+.
87
+ #
88
+ # [Parameters]
89
+ # - +path+ (String) The path to the input file
90
+ # - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
91
+ # use (default +nil+, encoding will be autodetected)
92
+ #
93
+ # [Returns] Nokogiri::XML::SAX::ParserContext
94
+ #
95
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser.parse_file which
96
+ # is more convenient for most use cases.
97
+ def file(input, encoding = nil)
98
+ native_file(input, resolve_encoding(encoding))
99
+ end
100
+
101
+ private def resolve_encoding(encoding)
102
+ case encoding
103
+ when Encoding
104
+ encoding
105
+
106
+ when nil
107
+ nil # totally fine, parser will guess encoding
108
+
109
+ when Integer
110
+ warn("Passing an integer to Nokogiri::XML::SAX::ParserContext.io is deprecated. Use an Encoding object instead. This will become an error in a future release.", uplevel: 2, category: :deprecated)
111
+
112
+ return nil if encoding == Parser::ENCODINGS["NONE"]
113
+
114
+ encoding = Parser::REVERSE_ENCODINGS[encoding]
115
+ raise ArgumentError, "Invalid libxml2 encoding id #{encoding}" if encoding.nil?
116
+ Encoding.find(encoding)
117
+
118
+ when String
119
+ Encoding.find(encoding)
120
+
121
+ else
122
+ raise ArgumentError, "Cannot resolve #{encoding.inspect} to an Encoding"
123
+ end
124
+ end
12
125
  end
13
126
  end
14
127
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -23,7 +25,6 @@ module Nokogiri
23
25
  # parser << "/div>"
24
26
  # parser.finish
25
27
  class PushParser
26
-
27
28
  # The Nokogiri::XML::SAX::Document on which the PushParser will be
28
29
  # operating
29
30
  attr_accessor :document
@@ -31,7 +32,7 @@ module Nokogiri
31
32
  ###
32
33
  # Create a new PushParser with +doc+ as the SAX Document, providing
33
34
  # an optional +file_name+ and +encoding+
34
- def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
35
36
  @document = doc
36
37
  @encoding = encoding
37
38
  @sax_parser = XML::SAX::Parser.new(doc)
@@ -43,16 +44,19 @@ module Nokogiri
43
44
  ###
44
45
  # Write a +chunk+ of XML to the PushParser. Any callback methods
45
46
  # that can be called will be called immediately.
46
- def write chunk, last_chunk = false
47
+ def write(chunk, last_chunk = false)
47
48
  native_write(chunk, last_chunk)
48
49
  end
49
- alias :<< :write
50
+ alias_method :<<, :write
50
51
 
51
52
  ###
52
53
  # Finish the parsing. This method is only necessary for
53
54
  # Nokogiri::XML::SAX::Document#end_document to be called.
55
+ #
56
+ # ⚠ Note that empty documents are treated as an error when using the libxml2-based
57
+ # implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).
54
58
  def finish
55
- write '', true
59
+ write("", true)
56
60
  end
57
61
  end
58
62
  end
@@ -1,4 +1,54 @@
1
- require 'nokogiri/xml/sax/document'
2
- require 'nokogiri/xml/sax/parser_context'
3
- require 'nokogiri/xml/sax/parser'
4
- require 'nokogiri/xml/sax/push_parser'
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ ###
6
+ # SAX Parsers are event-driven parsers.
7
+ #
8
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
9
+ # feels necessary, and a parser that you explicitly feed XML in chunks. If you want to let
10
+ # Nokogiri deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have
11
+ # fine grain control over the XML input, use the Nokogiri::XML::SAX::PushParser.
12
+ #
13
+ # If you want to do SAX style parsing of HTML, check out Nokogiri::HTML4::SAX.
14
+ #
15
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
16
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
17
+ # you when it encounters events you said you would like to know about.
18
+ #
19
+ # To register for events, subclass Nokogiri::XML::SAX::Document and implement the methods for
20
+ # which you would like notification.
21
+ #
22
+ # For example, if I want to be notified when a document ends, and when an element starts, I
23
+ # would write a class like this:
24
+ #
25
+ # class MyHandler < Nokogiri::XML::SAX::Document
26
+ # def end_document
27
+ # puts "the document has ended"
28
+ # end
29
+ #
30
+ # def start_element name, attributes = []
31
+ # puts "#{name} started"
32
+ # end
33
+ # end
34
+ #
35
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
36
+ #
37
+ # # Create a new parser
38
+ # parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
39
+ #
40
+ # # Feed the parser some XML
41
+ # parser.parse(File.open(ARGV[0]))
42
+ #
43
+ # Now my document handler will be called when each node starts, and when then document ends. To
44
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
45
+ #
46
+ module SAX
47
+ end
48
+ end
49
+ end
50
+
51
+ require_relative "sax/document"
52
+ require_relative "sax/parser_context"
53
+ require_relative "sax/parser"
54
+ require_relative "sax/push_parser"
@@ -1,62 +1,139 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class << self
4
- ###
5
- # Create a new Nokogiri::XML::Schema object using a +string_or_io+
6
- # object.
7
- def Schema string_or_io
8
- Schema.new(string_or_io)
6
+ # :call-seq:
7
+ # Schema(input) Nokogiri::XML::Schema
8
+ # Schema(input, parse_options) → Nokogiri::XML::Schema
9
+ #
10
+ # Convenience method for Nokogiri::XML::Schema.new
11
+ def Schema(...)
12
+ Schema.new(...)
9
13
  end
10
14
  end
11
15
 
12
- ###
13
- # Nokogiri::XML::Schema is used for validating XML against a schema
14
- # (usually from an xsd file).
16
+ # Nokogiri::XML::Schema is used for validating \XML against an \XSD schema definition.
17
+ #
18
+ # Since v1.11.0, Schema treats inputs as *untrusted* by default, and so external entities are
19
+ # not resolved from the network (+http://+ or +ftp://+). When parsing a trusted document, the
20
+ # caller may turn off the +NONET+ option via the ParseOptions to (re-)enable external entity
21
+ # resolution over a network connection.
22
+ #
23
+ # 🛡 Before v1.11.0, documents were "trusted" by default during schema parsing which was counter
24
+ # to Nokogiri's "untrusted by default" security policy.
25
+ #
26
+ # *Example:* Determine whether an \XML document is valid.
15
27
  #
16
- # == Synopsis
28
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
29
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
30
+ # schema.valid?(doc) # Boolean
17
31
  #
18
- # Validate an XML document against a Schema. Loop over the errors that
19
- # are returned and print them out:
32
+ # *Example:* Validate an \XML document against an \XSD schema, and capture any errors that are found.
20
33
  #
21
- # xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
22
- # doc = Nokogiri::XML(File.read(PO_XML_FILE))
34
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
35
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
36
+ # errors = schema.validate(doc) # Array<SyntaxError>
23
37
  #
24
- # xsd.validate(doc).each do |error|
25
- # puts error.message
26
- # end
38
+ # *Example:* Validate an \XML document using a Document containing an \XSD schema definition.
39
+ #
40
+ # schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
41
+ # schema = Nokogiri::XML::Schema.from_document(schema_doc)
42
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
43
+ # schema.valid?(doc) # Boolean
27
44
  #
28
- # The list of errors are Nokogiri::XML::SyntaxError objects.
29
45
  class Schema
30
- # Errors while parsing the schema file
46
+ # The errors found while parsing the \XSD
47
+ #
48
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
31
49
  attr_accessor :errors
32
50
 
33
- ###
34
- # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
- # object.
36
- def self.new string_or_io
37
- from_document Nokogiri::XML(string_or_io)
51
+ # The options used to parse the schema
52
+ #
53
+ # [Returns] Nokogiri::XML::ParseOptions
54
+ attr_accessor :parse_options
55
+
56
+ # :call-seq:
57
+ # new(input) → Nokogiri::XML::Schema
58
+ # new(input, parse_options) → Nokogiri::XML::Schema
59
+ #
60
+ # Parse an \XSD schema definition from a String or IO to create a new Nokogiri::XML::Schema
61
+ #
62
+ # [Parameters]
63
+ # - +input+ (String | IO) \XSD schema definition
64
+ # - +parse_options+ (Nokogiri::XML::ParseOptions)
65
+ # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
66
+ #
67
+ # [Returns] Nokogiri::XML::Schema
68
+ #
69
+ def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, parse_options: parse_options_)
70
+ from_document(Nokogiri::XML::Document.parse(input), parse_options)
71
+ end
72
+
73
+ # :call-seq:
74
+ # read_memory(input) → Nokogiri::XML::Schema
75
+ # read_memory(input, parse_options) → Nokogiri::XML::Schema
76
+ #
77
+ # Convenience method for Nokogiri::XML::Schema.new
78
+ def self.read_memory(...)
79
+ # TODO deprecate this method
80
+ new(...)
38
81
  end
39
82
 
40
- ###
41
- # Validate +thing+ against this schema. +thing+ can be a
42
- # Nokogiri::XML::Document object, or a filename. An Array of
43
- # Nokogiri::XML::SyntaxError objects found while validating the
44
- # +thing+ is returned.
45
- def validate thing
46
- if thing.is_a?(Nokogiri::XML::Document)
47
- validate_document(thing)
48
- elsif File.file?(thing)
49
- validate_file(thing)
83
+ #
84
+ # :call-seq: validate(input) Array<SyntaxError>
85
+ #
86
+ # Validate +input+ and return any errors that are found.
87
+ #
88
+ # [Parameters]
89
+ # - +input+ (Nokogiri::XML::Document | String)
90
+ # A parsed document, or a string containing a local filename.
91
+ #
92
+ # [Returns] Array<SyntaxError>
93
+ #
94
+ # *Example:* Validate an existing XML::Document, and capture any errors that are found.
95
+ #
96
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
97
+ # errors = schema.validate(document)
98
+ #
99
+ # *Example:* Validate an \XML document on disk, and capture any errors that are found.
100
+ #
101
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
102
+ # errors = schema.validate("/path/to/file.xml")
103
+ #
104
+ def validate(input)
105
+ if input.is_a?(Nokogiri::XML::Document)
106
+ validate_document(input)
107
+ elsif File.file?(input)
108
+ validate_file(input)
50
109
  else
51
- raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file"
110
+ raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file"
52
111
  end
53
112
  end
54
113
 
55
- ###
56
- # Returns true if +thing+ is a valid Nokogiri::XML::Document or
57
- # file.
58
- def valid? thing
59
- validate(thing).length == 0
114
+ #
115
+ # :call-seq: valid?(input) Boolean
116
+ #
117
+ # Validate +input+ and return a Boolean indicating whether the document is valid
118
+ #
119
+ # [Parameters]
120
+ # - +input+ (Nokogiri::XML::Document | String)
121
+ # A parsed document, or a string containing a local filename.
122
+ #
123
+ # [Returns] Boolean
124
+ #
125
+ # *Example:* Validate an existing XML::Document
126
+ #
127
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
128
+ # return unless schema.valid?(document)
129
+ #
130
+ # *Example:* Validate an \XML document on disk
131
+ #
132
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
133
+ # return unless schema.valid?("/path/to/file.xml")
134
+ #
135
+ def valid?(input)
136
+ validate(input).empty?
60
137
  end
61
138
  end
62
139
  end