nokogiri 1.18.0-arm-linux-musl

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +39 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +486 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +274 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +27 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +321 -0
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ ###
7
+ # This parser is a SAX style parser that reads its input as it deems necessary. The parser
8
+ # takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an XML input, sends
9
+ # messages to the Nokogiri::XML::SAX::Document.
10
+ #
11
+ # Here is an example of using this parser:
12
+ #
13
+ # # Create a subclass of Nokogiri::XML::SAX::Document and implement
14
+ # # the events we care about:
15
+ # class MyHandler < Nokogiri::XML::SAX::Document
16
+ # def start_element name, attrs = []
17
+ # puts "starting: #{name}"
18
+ # end
19
+ #
20
+ # def end_element name
21
+ # puts "ending: #{name}"
22
+ # end
23
+ # end
24
+ #
25
+ # parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
26
+ #
27
+ # # Hand an IO object to the parser, which will read the XML from the IO.
28
+ # File.open(path_to_xml) do |f|
29
+ # parser.parse(f)
30
+ # end
31
+ #
32
+ # For more information about \SAX parsers, see Nokogiri::XML::SAX.
33
+ #
34
+ # Also see Nokogiri::XML::SAX::Document for the available events.
35
+ #
36
+ # For \HTML documents, use the subclass Nokogiri::HTML4::SAX::Parser.
37
+ #
38
+ class Parser
39
+ # to dynamically resolve ParserContext in inherited methods
40
+ include Nokogiri::ClassResolver
41
+
42
+ # Structure used for marshalling attributes for some callbacks in XML::SAX::Document.
43
+ class Attribute < Struct.new(:localname, :prefix, :uri, :value)
44
+ end
45
+
46
+ ENCODINGS = { # :nodoc:
47
+ "NONE" => 0, # No char encoding detected
48
+ "UTF-8" => 1, # UTF-8
49
+ "UTF16LE" => 2, # UTF-16 little endian
50
+ "UTF16BE" => 3, # UTF-16 big endian
51
+ "UCS4LE" => 4, # UCS-4 little endian
52
+ "UCS4BE" => 5, # UCS-4 big endian
53
+ "EBCDIC" => 6, # EBCDIC uh!
54
+ "UCS4-2143" => 7, # UCS-4 unusual ordering
55
+ "UCS4-3412" => 8, # UCS-4 unusual ordering
56
+ "UCS2" => 9, # UCS-2
57
+ "ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
58
+ "ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
59
+ "ISO-8859-3" => 12, # ISO-8859-3
60
+ "ISO-8859-4" => 13, # ISO-8859-4
61
+ "ISO-8859-5" => 14, # ISO-8859-5
62
+ "ISO-8859-6" => 15, # ISO-8859-6
63
+ "ISO-8859-7" => 16, # ISO-8859-7
64
+ "ISO-8859-8" => 17, # ISO-8859-8
65
+ "ISO-8859-9" => 18, # ISO-8859-9
66
+ "ISO-2022-JP" => 19, # ISO-2022-JP
67
+ "SHIFT-JIS" => 20, # Shift_JIS
68
+ "EUC-JP" => 21, # EUC-JP
69
+ "ASCII" => 22, # pure ASCII
70
+ }
71
+ REVERSE_ENCODINGS = ENCODINGS.invert # :nodoc:
72
+ deprecate_constant :ENCODINGS
73
+
74
+ # The Nokogiri::XML::SAX::Document where events will be sent.
75
+ attr_accessor :document
76
+
77
+ # The encoding beings used for this document.
78
+ attr_accessor :encoding
79
+
80
+ ###
81
+ # :call-seq:
82
+ # new ⇒ SAX::Parser
83
+ # new(handler) ⇒ SAX::Parser
84
+ # new(handler, encoding) ⇒ SAX::Parser
85
+ #
86
+ # Create a new Parser.
87
+ #
88
+ # [Parameters]
89
+ # - +handler+ (optional Nokogiri::XML::SAX::Document) The document that will receive
90
+ # events. Will create a new Nokogiri::XML::SAX::Document if not given, which is accessible
91
+ # through the #document attribute.
92
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
93
+ # parsing the input. (default +nil+ for auto-detection)
94
+ #
95
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = nil)
96
+ @encoding = encoding
97
+ @document = doc
98
+ @warned = false
99
+
100
+ initialize_native unless Nokogiri.jruby?
101
+ end
102
+
103
+ ###
104
+ # :call-seq:
105
+ # parse(input) { |parser_context| ... }
106
+ #
107
+ # Parse the input, sending events to the SAX::Document at #document.
108
+ #
109
+ # [Parameters]
110
+ # - +input+ (String, IO) The input to parse.
111
+ #
112
+ # If +input+ quacks like a readable IO object, this method forwards to Parser.parse_io,
113
+ # otherwise it forwards to Parser.parse_memory.
114
+ #
115
+ # [Yields]
116
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
117
+ # to set options on the parser context before parsing begins.
118
+ #
119
+ def parse(input, &block)
120
+ if input.respond_to?(:read) && input.respond_to?(:close)
121
+ parse_io(input, &block)
122
+ else
123
+ parse_memory(input, &block)
124
+ end
125
+ end
126
+
127
+ ###
128
+ # :call-seq:
129
+ # parse_io(io) { |parser_context| ... }
130
+ # parse_io(io, encoding) { |parser_context| ... }
131
+ #
132
+ # Parse an input stream.
133
+ #
134
+ # [Parameters]
135
+ # - +io+ (IO) The readable IO object from which to read input
136
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
137
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
138
+ #
139
+ # [Yields]
140
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
141
+ # to set options on the parser context before parsing begins.
142
+ #
143
+ def parse_io(io, encoding = @encoding)
144
+ ctx = related_class("ParserContext").io(io, encoding)
145
+ yield ctx if block_given?
146
+ ctx.parse_with(self)
147
+ end
148
+
149
+ ###
150
+ # :call-seq:
151
+ # parse_memory(input) { |parser_context| ... }
152
+ # parse_memory(input, encoding) { |parser_context| ... }
153
+ #
154
+ # Parse an input string.
155
+ #
156
+ # [Parameters]
157
+ # - +input+ (String) The input string to be parsed.
158
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
159
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
160
+ #
161
+ # [Yields]
162
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
163
+ # to set options on the parser context before parsing begins.
164
+ #
165
+ def parse_memory(input, encoding = @encoding)
166
+ ctx = related_class("ParserContext").memory(input, encoding)
167
+ yield ctx if block_given?
168
+ ctx.parse_with(self)
169
+ end
170
+
171
+ ###
172
+ # :call-seq:
173
+ # parse_file(filename) { |parser_context| ... }
174
+ # parse_file(filename, encoding) { |parser_context| ... }
175
+ #
176
+ # Parse a file.
177
+ #
178
+ # [Parameters]
179
+ # - +filename+ (String) The path to the file to be parsed.
180
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
181
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
182
+ #
183
+ # [Yields]
184
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
185
+ # to set options on the parser context before parsing begins.
186
+ #
187
+ def parse_file(filename, encoding = @encoding)
188
+ raise ArgumentError, "no filename provided" unless filename
189
+ raise Errno::ENOENT unless File.exist?(filename)
190
+ raise Errno::EISDIR if File.directory?(filename)
191
+
192
+ ctx = related_class("ParserContext").file(filename, encoding)
193
+ yield ctx if block_given?
194
+ ctx.parse_with(self)
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ ###
7
+ # Context object to invoke the XML SAX parser on the SAX::Document handler.
8
+ #
9
+ # 💡 This class is usually not instantiated by the user. Use Nokogiri::XML::SAX::Parser
10
+ # instead.
11
+ class ParserContext
12
+ class << self
13
+ ###
14
+ # :call-seq:
15
+ # new(input)
16
+ # new(input, encoding)
17
+ #
18
+ # Create a parser context for an IO or a String. This is a shorthand method for
19
+ # ParserContext.io and ParserContext.memory.
20
+ #
21
+ # [Parameters]
22
+ # - +input+ (IO, String) A String or a readable IO object
23
+ # - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
24
+ # encoding to use (default +nil+, encoding will be autodetected)
25
+ #
26
+ # If +input+ quacks like a readable IO object, this method forwards to ParserContext.io,
27
+ # otherwise it forwards to ParserContext.memory.
28
+ #
29
+ # [Returns] Nokogiri::XML::SAX::ParserContext
30
+ #
31
+ def new(input, encoding = nil)
32
+ if [:read, :close].all? { |x| input.respond_to?(x) }
33
+ io(input, encoding)
34
+ else
35
+ memory(input, encoding)
36
+ end
37
+ end
38
+
39
+ ###
40
+ # :call-seq:
41
+ # io(input)
42
+ # io(input, encoding)
43
+ #
44
+ # Create a parser context for an +input+ IO which will assume +encoding+
45
+ #
46
+ # [Parameters]
47
+ # - +io+ (IO) The readable IO object from which to read input
48
+ # - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
49
+ # encoding to use (default +nil+, encoding will be autodetected)
50
+ #
51
+ # [Returns] Nokogiri::XML::SAX::ParserContext
52
+ #
53
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse
54
+ # methods which are more convenient for most use cases.
55
+ #
56
+ def io(input, encoding = nil)
57
+ native_io(input, resolve_encoding(encoding))
58
+ end
59
+
60
+ ###
61
+ # :call-seq:
62
+ # memory(input)
63
+ # memory(input, encoding)
64
+ #
65
+ # Create a parser context for the +input+ String.
66
+ #
67
+ # [Parameters]
68
+ # - +input+ (String) The input string to be parsed.
69
+ # - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
70
+ # use (default +nil+, encoding will be autodetected)
71
+ #
72
+ # [Returns] Nokogiri::XML::SAX::ParserContext
73
+ #
74
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse methods
75
+ # which are more convenient for most use cases.
76
+ #
77
+ def memory(input, encoding = nil)
78
+ native_memory(input, resolve_encoding(encoding))
79
+ end
80
+
81
+ ###
82
+ # :call-seq:
83
+ # file(path)
84
+ # file(path, encoding)
85
+ #
86
+ # Create a parser context for the file at +path+.
87
+ #
88
+ # [Parameters]
89
+ # - +path+ (String) The path to the input file
90
+ # - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
91
+ # use (default +nil+, encoding will be autodetected)
92
+ #
93
+ # [Returns] Nokogiri::XML::SAX::ParserContext
94
+ #
95
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser.parse_file which
96
+ # is more convenient for most use cases.
97
+ def file(input, encoding = nil)
98
+ native_file(input, resolve_encoding(encoding))
99
+ end
100
+
101
+ private def resolve_encoding(encoding)
102
+ case encoding
103
+ when Encoding
104
+ encoding
105
+
106
+ when nil
107
+ nil # totally fine, parser will guess encoding
108
+
109
+ when Integer
110
+ warn("Passing an integer to Nokogiri::XML::SAX::ParserContext.io is deprecated. Use an Encoding object instead. This will become an error in a future release.", uplevel: 2, category: :deprecated)
111
+
112
+ return nil if encoding == Parser::ENCODINGS["NONE"]
113
+
114
+ encoding = Parser::REVERSE_ENCODINGS[encoding]
115
+ raise ArgumentError, "Invalid libxml2 encoding id #{encoding}" if encoding.nil?
116
+ Encoding.find(encoding)
117
+
118
+ when String
119
+ Encoding.find(encoding)
120
+
121
+ else
122
+ raise ArgumentError, "Cannot resolve #{encoding.inspect} to an Encoding"
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ ###
7
+ # PushParser can parse a document that is fed to it manually. It
8
+ # must be given a SAX::Document object which will be called with
9
+ # SAX events as the document is being parsed.
10
+ #
11
+ # Calling PushParser#<< writes XML to the parser, calling any SAX
12
+ # callbacks it can.
13
+ #
14
+ # PushParser#finish tells the parser that the document is finished
15
+ # and calls the end_document SAX method.
16
+ #
17
+ # Example:
18
+ #
19
+ # parser = PushParser.new(Class.new(XML::SAX::Document) {
20
+ # def start_document
21
+ # puts "start document called"
22
+ # end
23
+ # }.new)
24
+ # parser << "<div>hello<"
25
+ # parser << "/div>"
26
+ # parser.finish
27
+ class PushParser
28
+ # The Nokogiri::XML::SAX::Document on which the PushParser will be
29
+ # operating
30
+ attr_accessor :document
31
+
32
+ ###
33
+ # Create a new PushParser with +doc+ as the SAX Document, providing
34
+ # an optional +file_name+ and +encoding+
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
36
+ @document = doc
37
+ @encoding = encoding
38
+ @sax_parser = XML::SAX::Parser.new(doc)
39
+
40
+ ## Create our push parser context
41
+ initialize_native(@sax_parser, file_name)
42
+ end
43
+
44
+ ###
45
+ # Write a +chunk+ of XML to the PushParser. Any callback methods
46
+ # that can be called will be called immediately.
47
+ def write(chunk, last_chunk = false)
48
+ native_write(chunk, last_chunk)
49
+ end
50
+ alias_method :<<, :write
51
+
52
+ ###
53
+ # Finish the parsing. This method is only necessary for
54
+ # Nokogiri::XML::SAX::Document#end_document to be called.
55
+ #
56
+ # ⚠ Note that empty documents are treated as an error when using the libxml2-based
57
+ # implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).
58
+ def finish
59
+ write("", true)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ ###
6
+ # SAX Parsers are event-driven parsers.
7
+ #
8
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
9
+ # feels necessary, and a parser that you explicitly feed XML in chunks. If you want to let
10
+ # Nokogiri deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have
11
+ # fine grain control over the XML input, use the Nokogiri::XML::SAX::PushParser.
12
+ #
13
+ # If you want to do SAX style parsing of HTML, check out Nokogiri::HTML4::SAX.
14
+ #
15
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
16
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
17
+ # you when it encounters events you said you would like to know about.
18
+ #
19
+ # To register for events, subclass Nokogiri::XML::SAX::Document and implement the methods for
20
+ # which you would like notification.
21
+ #
22
+ # For example, if I want to be notified when a document ends, and when an element starts, I
23
+ # would write a class like this:
24
+ #
25
+ # class MyHandler < Nokogiri::XML::SAX::Document
26
+ # def end_document
27
+ # puts "the document has ended"
28
+ # end
29
+ #
30
+ # def start_element name, attributes = []
31
+ # puts "#{name} started"
32
+ # end
33
+ # end
34
+ #
35
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
36
+ #
37
+ # # Create a new parser
38
+ # parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
39
+ #
40
+ # # Feed the parser some XML
41
+ # parser.parse(File.open(ARGV[0]))
42
+ #
43
+ # Now my document handler will be called when each node starts, and when then document ends. To
44
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
45
+ #
46
+ module SAX
47
+ end
48
+ end
49
+ end
50
+
51
+ require_relative "sax/document"
52
+ require_relative "sax/parser_context"
53
+ require_relative "sax/parser"
54
+ require_relative "sax/push_parser"
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class << self
6
+ # :call-seq:
7
+ # Schema(input) → Nokogiri::XML::Schema
8
+ # Schema(input, parse_options) → Nokogiri::XML::Schema
9
+ #
10
+ # Convenience method for Nokogiri::XML::Schema.new
11
+ def Schema(...)
12
+ Schema.new(...)
13
+ end
14
+ end
15
+
16
+ # Nokogiri::XML::Schema is used for validating \XML against an \XSD schema definition.
17
+ #
18
+ # ⚠ Since v1.11.0, Schema treats inputs as *untrusted* by default, and so external entities are
19
+ # not resolved from the network (+http://+ or +ftp://+). When parsing a trusted document, the
20
+ # caller may turn off the +NONET+ option via the ParseOptions to (re-)enable external entity
21
+ # resolution over a network connection.
22
+ #
23
+ # 🛡 Before v1.11.0, documents were "trusted" by default during schema parsing which was counter
24
+ # to Nokogiri's "untrusted by default" security policy.
25
+ #
26
+ # *Example:* Determine whether an \XML document is valid.
27
+ #
28
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
29
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
30
+ # schema.valid?(doc) # Boolean
31
+ #
32
+ # *Example:* Validate an \XML document against an \XSD schema, and capture any errors that are found.
33
+ #
34
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
35
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
36
+ # errors = schema.validate(doc) # Array<SyntaxError>
37
+ #
38
+ # *Example:* Validate an \XML document using a Document containing an \XSD schema definition.
39
+ #
40
+ # schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
41
+ # schema = Nokogiri::XML::Schema.from_document(schema_doc)
42
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
43
+ # schema.valid?(doc) # Boolean
44
+ #
45
+ class Schema
46
+ # The errors found while parsing the \XSD
47
+ #
48
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
49
+ attr_accessor :errors
50
+
51
+ # The options used to parse the schema
52
+ #
53
+ # [Returns] Nokogiri::XML::ParseOptions
54
+ attr_accessor :parse_options
55
+
56
+ # :call-seq:
57
+ # new(input) → Nokogiri::XML::Schema
58
+ # new(input, parse_options) → Nokogiri::XML::Schema
59
+ #
60
+ # Parse an \XSD schema definition from a String or IO to create a new Nokogiri::XML::Schema
61
+ #
62
+ # [Parameters]
63
+ # - +input+ (String | IO) \XSD schema definition
64
+ # - +parse_options+ (Nokogiri::XML::ParseOptions)
65
+ # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
66
+ #
67
+ # [Returns] Nokogiri::XML::Schema
68
+ #
69
+ def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, parse_options: parse_options_)
70
+ from_document(Nokogiri::XML::Document.parse(input), parse_options)
71
+ end
72
+
73
+ # :call-seq:
74
+ # read_memory(input) → Nokogiri::XML::Schema
75
+ # read_memory(input, parse_options) → Nokogiri::XML::Schema
76
+ #
77
+ # Convenience method for Nokogiri::XML::Schema.new
78
+ def self.read_memory(...)
79
+ # TODO deprecate this method
80
+ new(...)
81
+ end
82
+
83
+ #
84
+ # :call-seq: validate(input) → Array<SyntaxError>
85
+ #
86
+ # Validate +input+ and return any errors that are found.
87
+ #
88
+ # [Parameters]
89
+ # - +input+ (Nokogiri::XML::Document | String)
90
+ # A parsed document, or a string containing a local filename.
91
+ #
92
+ # [Returns] Array<SyntaxError>
93
+ #
94
+ # *Example:* Validate an existing XML::Document, and capture any errors that are found.
95
+ #
96
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
97
+ # errors = schema.validate(document)
98
+ #
99
+ # *Example:* Validate an \XML document on disk, and capture any errors that are found.
100
+ #
101
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
102
+ # errors = schema.validate("/path/to/file.xml")
103
+ #
104
+ def validate(input)
105
+ if input.is_a?(Nokogiri::XML::Document)
106
+ validate_document(input)
107
+ elsif File.file?(input)
108
+ validate_file(input)
109
+ else
110
+ raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file"
111
+ end
112
+ end
113
+
114
+ #
115
+ # :call-seq: valid?(input) → Boolean
116
+ #
117
+ # Validate +input+ and return a Boolean indicating whether the document is valid
118
+ #
119
+ # [Parameters]
120
+ # - +input+ (Nokogiri::XML::Document | String)
121
+ # A parsed document, or a string containing a local filename.
122
+ #
123
+ # [Returns] Boolean
124
+ #
125
+ # *Example:* Validate an existing XML::Document
126
+ #
127
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
128
+ # return unless schema.valid?(document)
129
+ #
130
+ # *Example:* Validate an \XML document on disk
131
+ #
132
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
133
+ # return unless schema.valid?("/path/to/file.xml")
134
+ #
135
+ def valid?(input)
136
+ validate(input).empty?
137
+ end
138
+ end
139
+ end
140
+ end