nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ # :nodoc: all
6
+ module PP
7
+ module Node
8
+ COLLECTIONS = [:attribute_nodes, :children]
9
+
10
+ def inspect
11
+ attributes = inspect_attributes.reject do |x|
12
+ attribute = send(x)
13
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
14
+ rescue NoMethodError
15
+ true
16
+ end.map do |attribute|
17
+ "#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
18
+ end.join(" ")
19
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
20
+ end
21
+
22
+ def pretty_print(pp)
23
+ nice_name = self.class.name.split("::").last
24
+ pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
25
+ pp.breakable
26
+ attrs = inspect_attributes.filter_map do |t|
27
+ [t, send(t)] if respond_to?(t)
28
+ end.find_all do |x|
29
+ if x.last
30
+ if COLLECTIONS.include?(x.first)
31
+ !x.last.empty?
32
+ else
33
+ true
34
+ end
35
+ end
36
+ end
37
+
38
+ pp.seplist(attrs) do |v|
39
+ if COLLECTIONS.include?(v.first)
40
+ pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
41
+ pp.breakable
42
+ pp.seplist(v.last) do |item|
43
+ pp.pp(item)
44
+ end
45
+ end
46
+ else
47
+ pp.text("#{v.first} = ")
48
+ pp.pp(v.last)
49
+ end
50
+ end
51
+ pp.breakable
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pp/node"
4
+ require_relative "pp/character_data"
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class ProcessingInstruction < Node
6
+ def initialize(document, name, content)
7
+ super(document, name)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ ###
6
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
+ # would move. The Reader is given an XML document, and yields nodes
8
+ # to an each block.
9
+ #
10
+ # Here is an example of usage:
11
+ #
12
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
13
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
14
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
15
+ # </x>
16
+ # eoxml
17
+ #
18
+ # reader.each do |node|
19
+ #
20
+ # # node is an instance of Nokogiri::XML::Reader
21
+ # puts node.name
22
+ #
23
+ # end
24
+ #
25
+ # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
+ # the cursor moves through the entire document, you must parse the
27
+ # document again. So make sure that you capture any information you
28
+ # need during the first iteration.
29
+ #
30
+ # The Reader parser is good for when you need the speed of a SAX parser,
31
+ # but do not want to write a Document handler.
32
+ class Reader
33
+ include Enumerable
34
+
35
+ TYPE_NONE = 0
36
+ # Element node type
37
+ TYPE_ELEMENT = 1
38
+ # Attribute node type
39
+ TYPE_ATTRIBUTE = 2
40
+ # Text node type
41
+ TYPE_TEXT = 3
42
+ # CDATA node type
43
+ TYPE_CDATA = 4
44
+ # Entity Reference node type
45
+ TYPE_ENTITY_REFERENCE = 5
46
+ # Entity node type
47
+ TYPE_ENTITY = 6
48
+ # PI node type
49
+ TYPE_PROCESSING_INSTRUCTION = 7
50
+ # Comment node type
51
+ TYPE_COMMENT = 8
52
+ # Document node type
53
+ TYPE_DOCUMENT = 9
54
+ # Document Type node type
55
+ TYPE_DOCUMENT_TYPE = 10
56
+ # Document Fragment node type
57
+ TYPE_DOCUMENT_FRAGMENT = 11
58
+ # Notation node type
59
+ TYPE_NOTATION = 12
60
+ # Whitespace node type
61
+ TYPE_WHITESPACE = 13
62
+ # Significant Whitespace node type
63
+ TYPE_SIGNIFICANT_WHITESPACE = 14
64
+ # Element end node type
65
+ TYPE_END_ELEMENT = 15
66
+ # Entity end node type
67
+ TYPE_END_ENTITY = 16
68
+ # XML Declaration node type
69
+ TYPE_XML_DECLARATION = 17
70
+
71
+ # A list of errors encountered while parsing
72
+ attr_accessor :errors
73
+
74
+ # The XML source
75
+ attr_reader :source
76
+
77
+ alias_method :self_closing?, :empty_element?
78
+
79
+ def initialize(source, url = nil, encoding = nil) # :nodoc:
80
+ @source = source
81
+ @errors = []
82
+ @encoding = encoding
83
+ end
84
+ private :initialize
85
+
86
+ # Get the attributes and namespaces of the current node as a Hash.
87
+ #
88
+ # This is the union of Reader#attribute_hash and Reader#namespaces
89
+ #
90
+ # [Returns]
91
+ # (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
92
+ def attributes
93
+ attribute_hash.merge(namespaces)
94
+ end
95
+
96
+ ###
97
+ # Move the cursor through the document yielding the cursor to the block
98
+ def each
99
+ while (cursor = read)
100
+ yield cursor
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class << self
6
+ ###
7
+ # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
8
+ # See Nokogiri::XML::RelaxNG for an example.
9
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
+ RelaxNG.new(string_or_io, options)
11
+ end
12
+ end
13
+
14
+ ###
15
+ # Nokogiri::XML::RelaxNG is used for validating XML against a
16
+ # RelaxNG schema.
17
+ #
18
+ # == Synopsis
19
+ #
20
+ # Validate an XML document against a RelaxNG schema. Loop over the errors
21
+ # that are returned and print them out:
22
+ #
23
+ # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
24
+ # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
25
+ #
26
+ # schema.validate(doc).each do |error|
27
+ # puts error.message
28
+ # end
29
+ #
30
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
31
+ #
32
+ # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
33
+ # underlying parsing libraries to access network resources. This is counter to Nokogiri's
34
+ # "untrusted by default" security policy, but is a limitation of the underlying libraries.
35
+ class RelaxNG < Nokogiri::XML::Schema
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ ###
6
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
7
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
8
+ # Nokogiri::HTML4::SAX.
9
+ #
10
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
11
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
12
+ # you when it encounters events you said you would like to know about.
13
+ #
14
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
15
+ # methods for which you would like notification.
16
+ #
17
+ # For example, if I want to be notified when a document ends, and when an element starts, I
18
+ # would write a class like this:
19
+ #
20
+ # class MyDocument < Nokogiri::XML::SAX::Document
21
+ # def end_document
22
+ # puts "the document has ended"
23
+ # end
24
+ #
25
+ # def start_element name, attributes = []
26
+ # puts "#{name} started"
27
+ # end
28
+ # end
29
+ #
30
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
31
+ #
32
+ # # Create a new parser
33
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
+ #
35
+ # # Feed the parser some XML
36
+ # parser.parse(File.open(ARGV[0]))
37
+ #
38
+ # Now my document handler will be called when each node starts, and when then document ends. To
39
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
40
+ #
41
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
42
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
43
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
44
+ # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
45
+ module SAX
46
+ ###
47
+ # This class is used for registering types of events you are interested in handling. All of
48
+ # the methods on this class are available as possible events while parsing an XML document. To
49
+ # register for any particular event, just subclass this class and implement the methods you
50
+ # are interested in knowing about.
51
+ #
52
+ # To only be notified about start and end element events, write a class like this:
53
+ #
54
+ # class MyDocument < Nokogiri::XML::SAX::Document
55
+ # def start_element name, attrs = []
56
+ # puts "#{name} started!"
57
+ # end
58
+ #
59
+ # def end_element name
60
+ # puts "#{name} ended"
61
+ # end
62
+ # end
63
+ #
64
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
65
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
66
+ class Document
67
+ ###
68
+ # Called when an XML declaration is parsed
69
+ def xmldecl(version, encoding, standalone)
70
+ end
71
+
72
+ ###
73
+ # Called when document starts parsing
74
+ def start_document
75
+ end
76
+
77
+ ###
78
+ # Called when document ends parsing
79
+ def end_document
80
+ end
81
+
82
+ ###
83
+ # Called at the beginning of an element
84
+ # * +name+ is the name of the tag
85
+ # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
86
+ # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
87
+ def start_element(name, attrs = [])
88
+ end
89
+
90
+ ###
91
+ # Called at the end of an element
92
+ # +name+ is the tag name
93
+ def end_element(name)
94
+ end
95
+
96
+ ###
97
+ # Called at the beginning of an element
98
+ # +name+ is the element name
99
+ # +attrs+ is a list of attributes
100
+ # +prefix+ is the namespace prefix for the element
101
+ # +uri+ is the associated namespace URI
102
+ # +ns+ is a hash of namespace prefix:urls associated with the element
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
104
+ ###
105
+ # Deal with SAX v1 interface
106
+ name = [prefix, name].compact.join(":")
107
+ attributes = ns.map do |ns_prefix, ns_uri|
108
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
109
+ end + attrs.map do |attr|
110
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
111
+ end
112
+ start_element(name, attributes)
113
+ end
114
+
115
+ ###
116
+ # Called at the end of an element
117
+ # +name+ is the element's name
118
+ # +prefix+ is the namespace prefix associated with the element
119
+ # +uri+ is the associated namespace URI
120
+ def end_element_namespace(name, prefix = nil, uri = nil)
121
+ ###
122
+ # Deal with SAX v1 interface
123
+ end_element([prefix, name].compact.join(":"))
124
+ end
125
+
126
+ ###
127
+ # Characters read between a tag. This method might be called multiple
128
+ # times given one contiguous string of characters.
129
+ #
130
+ # +string+ contains the character data
131
+ def characters(string)
132
+ end
133
+
134
+ ###
135
+ # Called when comments are encountered
136
+ # +string+ contains the comment data
137
+ def comment(string)
138
+ end
139
+
140
+ ###
141
+ # Called on document warnings
142
+ # +string+ contains the warning
143
+ def warning(string)
144
+ end
145
+
146
+ ###
147
+ # Called on document errors
148
+ # +string+ contains the error
149
+ def error(string)
150
+ end
151
+
152
+ ###
153
+ # Called when cdata blocks are found
154
+ # +string+ contains the cdata content
155
+ def cdata_block(string)
156
+ end
157
+
158
+ ###
159
+ # Called when processing instructions are found
160
+ # +name+ is the target of the instruction
161
+ # +content+ is the value of the instruction
162
+ def processing_instruction(name, content)
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ ###
7
+ # This parser is a SAX style parser that reads it's input as it
8
+ # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
9
+ # an optional encoding, then given an XML input, sends messages to
10
+ # the Nokogiri::XML::SAX::Document.
11
+ #
12
+ # Here is an example of using this parser:
13
+ #
14
+ # # Create a subclass of Nokogiri::XML::SAX::Document and implement
15
+ # # the events we care about:
16
+ # class MyDoc < Nokogiri::XML::SAX::Document
17
+ # def start_element name, attrs = []
18
+ # puts "starting: #{name}"
19
+ # end
20
+ #
21
+ # def end_element name
22
+ # puts "ending: #{name}"
23
+ # end
24
+ # end
25
+ #
26
+ # # Create our parser
27
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
28
+ #
29
+ # # Send some XML to the parser
30
+ # parser.parse(File.open(ARGV[0]))
31
+ #
32
+ # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
33
+ # see Nokogiri::XML::SAX::Document for the available events.
34
+ class Parser
35
+ class Attribute < Struct.new(:localname, :prefix, :uri, :value)
36
+ end
37
+
38
+ # Encodinds this parser supports
39
+ ENCODINGS = {
40
+ "NONE" => 0, # No char encoding detected
41
+ "UTF-8" => 1, # UTF-8
42
+ "UTF16LE" => 2, # UTF-16 little endian
43
+ "UTF16BE" => 3, # UTF-16 big endian
44
+ "UCS4LE" => 4, # UCS-4 little endian
45
+ "UCS4BE" => 5, # UCS-4 big endian
46
+ "EBCDIC" => 6, # EBCDIC uh!
47
+ "UCS4-2143" => 7, # UCS-4 unusual ordering
48
+ "UCS4-3412" => 8, # UCS-4 unusual ordering
49
+ "UCS2" => 9, # UCS-2
50
+ "ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
51
+ "ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
52
+ "ISO-8859-3" => 12, # ISO-8859-3
53
+ "ISO-8859-4" => 13, # ISO-8859-4
54
+ "ISO-8859-5" => 14, # ISO-8859-5
55
+ "ISO-8859-6" => 15, # ISO-8859-6
56
+ "ISO-8859-7" => 16, # ISO-8859-7
57
+ "ISO-8859-8" => 17, # ISO-8859-8
58
+ "ISO-8859-9" => 18, # ISO-8859-9
59
+ "ISO-2022-JP" => 19, # ISO-2022-JP
60
+ "SHIFT-JIS" => 20, # Shift_JIS
61
+ "EUC-JP" => 21, # EUC-JP
62
+ "ASCII" => 22, # pure ASCII
63
+ }
64
+
65
+ # The Nokogiri::XML::SAX::Document where events will be sent.
66
+ attr_accessor :document
67
+
68
+ # The encoding beings used for this document.
69
+ attr_accessor :encoding
70
+
71
+ # Create a new Parser with +doc+ and +encoding+
72
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
73
+ @encoding = check_encoding(encoding)
74
+ @document = doc
75
+ @warned = false
76
+ end
77
+
78
+ ###
79
+ # Parse given +thing+ which may be a string containing xml, or an
80
+ # IO object.
81
+ def parse(thing, &block)
82
+ if thing.respond_to?(:read) && thing.respond_to?(:close)
83
+ parse_io(thing, &block)
84
+ else
85
+ parse_memory(thing, &block)
86
+ end
87
+ end
88
+
89
+ ###
90
+ # Parse given +io+
91
+ def parse_io(io, encoding = @encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
93
+ yield ctx if block_given?
94
+ ctx.parse_with(self)
95
+ end
96
+
97
+ ###
98
+ # Parse a file with +filename+
99
+ def parse_file(filename)
100
+ raise ArgumentError unless filename
101
+ raise Errno::ENOENT unless File.exist?(filename)
102
+ raise Errno::EISDIR if File.directory?(filename)
103
+
104
+ ctx = ParserContext.file(filename)
105
+ yield ctx if block_given?
106
+ ctx.parse_with(self)
107
+ end
108
+
109
+ def parse_memory(data)
110
+ ctx = ParserContext.memory(data)
111
+ yield ctx if block_given?
112
+ ctx.parse_with(self)
113
+ end
114
+
115
+ private
116
+
117
+ def check_encoding(encoding)
118
+ encoding.upcase.tap do |enc|
119
+ raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ ###
7
+ # Context for XML SAX parsers. This class is usually not instantiated
8
+ # by the user. Instead, you should be looking at
9
+ # Nokogiri::XML::SAX::Parser
10
+ class ParserContext
11
+ def self.new(thing, encoding = "UTF-8")
12
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
13
+ io(thing, Parser::ENCODINGS[encoding])
14
+ else
15
+ memory(thing)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module SAX
6
+ ###
7
+ # PushParser can parse a document that is fed to it manually. It
8
+ # must be given a SAX::Document object which will be called with
9
+ # SAX events as the document is being parsed.
10
+ #
11
+ # Calling PushParser#<< writes XML to the parser, calling any SAX
12
+ # callbacks it can.
13
+ #
14
+ # PushParser#finish tells the parser that the document is finished
15
+ # and calls the end_document SAX method.
16
+ #
17
+ # Example:
18
+ #
19
+ # parser = PushParser.new(Class.new(XML::SAX::Document) {
20
+ # def start_document
21
+ # puts "start document called"
22
+ # end
23
+ # }.new)
24
+ # parser << "<div>hello<"
25
+ # parser << "/div>"
26
+ # parser.finish
27
+ class PushParser
28
+ # The Nokogiri::XML::SAX::Document on which the PushParser will be
29
+ # operating
30
+ attr_accessor :document
31
+
32
+ ###
33
+ # Create a new PushParser with +doc+ as the SAX Document, providing
34
+ # an optional +file_name+ and +encoding+
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
36
+ @document = doc
37
+ @encoding = encoding
38
+ @sax_parser = XML::SAX::Parser.new(doc)
39
+
40
+ ## Create our push parser context
41
+ initialize_native(@sax_parser, file_name)
42
+ end
43
+
44
+ ###
45
+ # Write a +chunk+ of XML to the PushParser. Any callback methods
46
+ # that can be called will be called immediately.
47
+ def write(chunk, last_chunk = false)
48
+ native_write(chunk, last_chunk)
49
+ end
50
+ alias_method :<<, :write
51
+
52
+ ###
53
+ # Finish the parsing. This method is only necessary for
54
+ # Nokogiri::XML::SAX::Document#end_document to be called.
55
+ def finish
56
+ write("", true)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "sax/document"
4
+ require_relative "sax/parser_context"
5
+ require_relative "sax/parser"
6
+ require_relative "sax/push_parser"
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class << self
6
+ ###
7
+ # Create a new Nokogiri::XML::Schema object using a +string_or_io+
8
+ # object.
9
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
+ Schema.new(string_or_io, options)
11
+ end
12
+ end
13
+
14
+ ###
15
+ # Nokogiri::XML::Schema is used for validating XML against a schema
16
+ # (usually from an xsd file).
17
+ #
18
+ # == Synopsis
19
+ #
20
+ # Validate an XML document against a Schema. Loop over the errors that
21
+ # are returned and print them out:
22
+ #
23
+ # xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
24
+ # doc = Nokogiri::XML(File.read(PO_XML_FILE))
25
+ #
26
+ # xsd.validate(doc).each do |error|
27
+ # puts error.message
28
+ # end
29
+ #
30
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
31
+ #
32
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
33
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
34
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
35
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
36
+ # the ParseOptions to re-enable external entity resolution over a network connection.
37
+ class Schema
38
+ # Errors while parsing the schema file
39
+ attr_accessor :errors
40
+ # The Nokogiri::XML::ParseOptions used to parse the schema
41
+ attr_accessor :parse_options
42
+
43
+ ###
44
+ # Create a new Nokogiri::XML::Schema object using a +string_or_io+
45
+ # object.
46
+ def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
47
+ from_document(Nokogiri::XML(string_or_io), options)
48
+ end
49
+
50
+ ###
51
+ # Validate +thing+ against this schema. +thing+ can be a
52
+ # Nokogiri::XML::Document object, or a filename. An Array of
53
+ # Nokogiri::XML::SyntaxError objects found while validating the
54
+ # +thing+ is returned.
55
+ def validate(thing)
56
+ if thing.is_a?(Nokogiri::XML::Document)
57
+ validate_document(thing)
58
+ elsif File.file?(thing)
59
+ validate_file(thing)
60
+ else
61
+ raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file"
62
+ end
63
+ end
64
+
65
+ ###
66
+ # Returns true if +thing+ is a valid Nokogiri::XML::Document or
67
+ # file.
68
+ def valid?(thing)
69
+ validate(thing).empty?
70
+ end
71
+ end
72
+ end
73
+ end