nokogiri 1.11.0.rc1-x86-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1614 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +358 -0
  8. data/ext/nokogiri/extconf.rb +695 -0
  9. data/ext/nokogiri/html_document.c +170 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/nokogiri.c +147 -0
  20. data/ext/nokogiri/nokogiri.h +122 -0
  21. data/ext/nokogiri/xml_attr.c +103 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +62 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +69 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +617 -0
  30. data/ext/nokogiri/xml_document.h +23 -0
  31. data/ext/nokogiri/xml_document_fragment.c +48 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +202 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +110 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +52 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +61 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  48. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  49. data/ext/nokogiri/xml_namespace.c +111 -0
  50. data/ext/nokogiri/xml_namespace.h +14 -0
  51. data/ext/nokogiri/xml_node.c +1773 -0
  52. data/ext/nokogiri/xml_node.h +13 -0
  53. data/ext/nokogiri/xml_node_set.c +486 -0
  54. data/ext/nokogiri/xml_node_set.h +12 -0
  55. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  56. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  57. data/ext/nokogiri/xml_reader.c +668 -0
  58. data/ext/nokogiri/xml_reader.h +10 -0
  59. data/ext/nokogiri/xml_relax_ng.c +161 -0
  60. data/ext/nokogiri/xml_relax_ng.h +9 -0
  61. data/ext/nokogiri/xml_sax_parser.c +310 -0
  62. data/ext/nokogiri/xml_sax_parser.h +39 -0
  63. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  64. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  65. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  66. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  67. data/ext/nokogiri/xml_schema.c +205 -0
  68. data/ext/nokogiri/xml_schema.h +9 -0
  69. data/ext/nokogiri/xml_syntax_error.c +64 -0
  70. data/ext/nokogiri/xml_syntax_error.h +13 -0
  71. data/ext/nokogiri/xml_text.c +52 -0
  72. data/ext/nokogiri/xml_text.h +9 -0
  73. data/ext/nokogiri/xml_xpath_context.c +298 -0
  74. data/ext/nokogiri/xml_xpath_context.h +10 -0
  75. data/ext/nokogiri/xslt_stylesheet.c +266 -0
  76. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  77. data/lib/nokogiri.rb +127 -0
  78. data/lib/nokogiri/2.4/nokogiri.so +0 -0
  79. data/lib/nokogiri/2.5/nokogiri.so +0 -0
  80. data/lib/nokogiri/2.6/nokogiri.so +0 -0
  81. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  82. data/lib/nokogiri/css.rb +28 -0
  83. data/lib/nokogiri/css/node.rb +53 -0
  84. data/lib/nokogiri/css/parser.rb +751 -0
  85. data/lib/nokogiri/css/parser.y +272 -0
  86. data/lib/nokogiri/css/parser_extras.rb +92 -0
  87. data/lib/nokogiri/css/syntax_error.rb +8 -0
  88. data/lib/nokogiri/css/tokenizer.rb +154 -0
  89. data/lib/nokogiri/css/tokenizer.rex +55 -0
  90. data/lib/nokogiri/css/xpath_visitor.rb +232 -0
  91. data/lib/nokogiri/decorators/slop.rb +43 -0
  92. data/lib/nokogiri/html.rb +38 -0
  93. data/lib/nokogiri/html/builder.rb +36 -0
  94. data/lib/nokogiri/html/document.rb +336 -0
  95. data/lib/nokogiri/html/document_fragment.rb +50 -0
  96. data/lib/nokogiri/html/element_description.rb +24 -0
  97. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  98. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  99. data/lib/nokogiri/html/sax/parser.rb +63 -0
  100. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  101. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  102. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  103. data/lib/nokogiri/syntax_error.rb +5 -0
  104. data/lib/nokogiri/version.rb +149 -0
  105. data/lib/nokogiri/xml.rb +76 -0
  106. data/lib/nokogiri/xml/attr.rb +15 -0
  107. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  108. data/lib/nokogiri/xml/builder.rb +447 -0
  109. data/lib/nokogiri/xml/cdata.rb +12 -0
  110. data/lib/nokogiri/xml/character_data.rb +8 -0
  111. data/lib/nokogiri/xml/document.rb +280 -0
  112. data/lib/nokogiri/xml/document_fragment.rb +161 -0
  113. data/lib/nokogiri/xml/dtd.rb +33 -0
  114. data/lib/nokogiri/xml/element_content.rb +37 -0
  115. data/lib/nokogiri/xml/element_decl.rb +14 -0
  116. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  117. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  118. data/lib/nokogiri/xml/namespace.rb +14 -0
  119. data/lib/nokogiri/xml/node.rb +916 -0
  120. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  121. data/lib/nokogiri/xml/node_set.rb +372 -0
  122. data/lib/nokogiri/xml/notation.rb +7 -0
  123. data/lib/nokogiri/xml/parse_options.rb +121 -0
  124. data/lib/nokogiri/xml/pp.rb +3 -0
  125. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  126. data/lib/nokogiri/xml/pp/node.rb +57 -0
  127. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  128. data/lib/nokogiri/xml/reader.rb +116 -0
  129. data/lib/nokogiri/xml/relax_ng.rb +33 -0
  130. data/lib/nokogiri/xml/sax.rb +5 -0
  131. data/lib/nokogiri/xml/sax/document.rb +172 -0
  132. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  133. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  134. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  135. data/lib/nokogiri/xml/schema.rb +64 -0
  136. data/lib/nokogiri/xml/searchable.rb +231 -0
  137. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  138. data/lib/nokogiri/xml/text.rb +10 -0
  139. data/lib/nokogiri/xml/xpath.rb +11 -0
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  141. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  142. data/lib/nokogiri/xslt.rb +57 -0
  143. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  144. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  145. metadata +482 -0
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+ require 'nokogiri/xml/pp/node'
3
+ require 'nokogiri/xml/pp/character_data'
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ module PP
5
+ module CharacterData
6
+ def pretty_print pp # :nodoc:
7
+ nice_name = self.class.name.split('::').last
8
+ pp.group(2, "#(#{nice_name} ", ')') do
9
+ pp.pp text
10
+ end
11
+ end
12
+
13
+ def inspect # :nodoc:
14
+ "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ module PP
5
+ module Node
6
+ def inspect # :nodoc:
7
+ attributes = inspect_attributes.reject { |x|
8
+ begin
9
+ attribute = send x
10
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
11
+ rescue NoMethodError
12
+ true
13
+ end
14
+ }.map { |attribute|
15
+ "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
16
+ }.join ' '
17
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
18
+ end
19
+
20
+ def pretty_print pp # :nodoc:
21
+ nice_name = self.class.name.split('::').last
22
+ pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
23
+
24
+ pp.breakable
25
+ attrs = inspect_attributes.map { |t|
26
+ [t, send(t)] if respond_to?(t)
27
+ }.compact.find_all { |x|
28
+ if x.last
29
+ if [:attribute_nodes, :children].include? x.first
30
+ !x.last.empty?
31
+ else
32
+ true
33
+ end
34
+ end
35
+ }
36
+
37
+ pp.seplist(attrs) do |v|
38
+ if [:attribute_nodes, :children].include? v.first
39
+ pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
40
+ pp.breakable
41
+ pp.seplist(v.last) do |item|
42
+ pp.pp item
43
+ end
44
+ end
45
+ else
46
+ pp.text "#{v.first} = "
47
+ pp.pp v.last
48
+ end
49
+ end
50
+ pp.breakable
51
+
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class ProcessingInstruction < Node
5
+ def initialize document, name, content
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ ###
5
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
6
+ # would move. The Reader is given an XML document, and yields nodes
7
+ # to an each block.
8
+ #
9
+ # Here is an example of usage:
10
+ #
11
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
12
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
13
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
14
+ # </x>
15
+ # eoxml
16
+ #
17
+ # reader.each do |node|
18
+ #
19
+ # # node is an instance of Nokogiri::XML::Reader
20
+ # puts node.name
21
+ #
22
+ # end
23
+ #
24
+ # Note that Nokogiri::XML::Reader#each can only be called once!! Once
25
+ # the cursor moves through the entire document, you must parse the
26
+ # document again. So make sure that you capture any information you
27
+ # need during the first iteration.
28
+ #
29
+ # The Reader parser is good for when you need the speed of a SAX parser,
30
+ # but do not want to write a Document handler.
31
+ class Reader
32
+ include Enumerable
33
+
34
+ TYPE_NONE = 0
35
+ # Element node type
36
+ TYPE_ELEMENT = 1
37
+ # Attribute node type
38
+ TYPE_ATTRIBUTE = 2
39
+ # Text node type
40
+ TYPE_TEXT = 3
41
+ # CDATA node type
42
+ TYPE_CDATA = 4
43
+ # Entity Reference node type
44
+ TYPE_ENTITY_REFERENCE = 5
45
+ # Entity node type
46
+ TYPE_ENTITY = 6
47
+ # PI node type
48
+ TYPE_PROCESSING_INSTRUCTION = 7
49
+ # Comment node type
50
+ TYPE_COMMENT = 8
51
+ # Document node type
52
+ TYPE_DOCUMENT = 9
53
+ # Document Type node type
54
+ TYPE_DOCUMENT_TYPE = 10
55
+ # Document Fragment node type
56
+ TYPE_DOCUMENT_FRAGMENT = 11
57
+ # Notation node type
58
+ TYPE_NOTATION = 12
59
+ # Whitespace node type
60
+ TYPE_WHITESPACE = 13
61
+ # Significant Whitespace node type
62
+ TYPE_SIGNIFICANT_WHITESPACE = 14
63
+ # Element end node type
64
+ TYPE_END_ELEMENT = 15
65
+ # Entity end node type
66
+ TYPE_END_ENTITY = 16
67
+ # XML Declaration node type
68
+ TYPE_XML_DECLARATION = 17
69
+
70
+ # A list of errors encountered while parsing
71
+ attr_accessor :errors
72
+
73
+ # The encoding for the document
74
+ attr_reader :encoding
75
+
76
+ # The XML source
77
+ attr_reader :source
78
+
79
+ alias :self_closing? :empty_element?
80
+
81
+ def initialize source, url = nil, encoding = nil # :nodoc:
82
+ @source = source
83
+ @errors = []
84
+ @encoding = encoding
85
+ end
86
+ private :initialize
87
+
88
+ ###
89
+ # Get a list of attributes for the current node.
90
+ def attributes
91
+ attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
92
+ hash[node.name] = node.to_s
93
+ end
94
+ ns = namespaces
95
+ attrs_hash.merge!(ns) if ns
96
+ attrs_hash
97
+ end
98
+
99
+ ###
100
+ # Get a list of attributes for the current node
101
+ def attribute_nodes
102
+ nodes = attr_nodes
103
+ nodes.each { |v| v.instance_variable_set(:@_r, self) }
104
+ nodes
105
+ end
106
+
107
+ ###
108
+ # Move the cursor through the document yielding the cursor to the block
109
+ def each
110
+ while cursor = self.read
111
+ yield cursor
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class << self
5
+ ###
6
+ # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
7
+ # See Nokogiri::XML::RelaxNG for an example.
8
+ def RelaxNG string_or_io
9
+ RelaxNG.new(string_or_io)
10
+ end
11
+ end
12
+
13
+ ###
14
+ # Nokogiri::XML::RelaxNG is used for validating XML against a
15
+ # RelaxNG schema.
16
+ #
17
+ # == Synopsis
18
+ #
19
+ # Validate an XML document against a RelaxNG schema. Loop over the errors
20
+ # that are returned and print them out:
21
+ #
22
+ # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
23
+ # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
24
+ #
25
+ # schema.validate(doc).each do |error|
26
+ # puts error.message
27
+ # end
28
+ #
29
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ class RelaxNG < Nokogiri::XML::Schema
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ require 'nokogiri/xml/sax/document'
3
+ require 'nokogiri/xml/sax/parser_context'
4
+ require 'nokogiri/xml/sax/parser'
5
+ require 'nokogiri/xml/sax/push_parser'
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ ###
5
+ # SAX Parsers are event driven parsers. Nokogiri provides two different
6
+ # event based parsers when dealing with XML. If you want to do SAX style
7
+ # parsing using HTML, check out Nokogiri::HTML::SAX.
8
+ #
9
+ # The basic way a SAX style parser works is by creating a parser,
10
+ # telling the parser about the events we're interested in, then giving
11
+ # the parser some XML to process. The parser will notify you when
12
+ # it encounters events you said you would like to know about.
13
+ #
14
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
15
+ # and implement the methods for which you would like notification.
16
+ #
17
+ # For example, if I want to be notified when a document ends, and when an
18
+ # element starts, I would write a class like this:
19
+ #
20
+ # class MyDocument < Nokogiri::XML::SAX::Document
21
+ # def end_document
22
+ # puts "the document has ended"
23
+ # end
24
+ #
25
+ # def start_element name, attributes = []
26
+ # puts "#{name} started"
27
+ # end
28
+ # end
29
+ #
30
+ # Then I would instantiate a SAX parser with this document, and feed the
31
+ # parser some XML
32
+ #
33
+ # # Create a new parser
34
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
35
+ #
36
+ # # Feed the parser some XML
37
+ # parser.parse(File.open(ARGV[0]))
38
+ #
39
+ # Now my document handler will be called when each node starts, and when
40
+ # then document ends. To see what kinds of events are available, take
41
+ # a look at Nokogiri::XML::SAX::Document.
42
+ #
43
+ # Two SAX parsers for XML are available, a parser that reads from a string
44
+ # or IO object as it feels necessary, and a parser that lets you spoon
45
+ # feed it XML. If you want to let Nokogiri deal with reading your XML,
46
+ # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
47
+ # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
48
+ module SAX
49
+ ###
50
+ # This class is used for registering types of events you are interested
51
+ # in handling. All of the methods on this class are available as
52
+ # possible events while parsing an XML document. To register for any
53
+ # particular event, just subclass this class and implement the methods
54
+ # you are interested in knowing about.
55
+ #
56
+ # To only be notified about start and end element events, write a class
57
+ # like this:
58
+ #
59
+ # class MyDocument < Nokogiri::XML::SAX::Document
60
+ # def start_element name, attrs = []
61
+ # puts "#{name} started!"
62
+ # end
63
+ #
64
+ # def end_element name
65
+ # puts "#{name} ended"
66
+ # end
67
+ # end
68
+ #
69
+ # You can use this event handler for any SAX style parser included with
70
+ # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
71
+ class Document
72
+ ###
73
+ # Called when an XML declaration is parsed
74
+ def xmldecl version, encoding, standalone
75
+ end
76
+
77
+ ###
78
+ # Called when document starts parsing
79
+ def start_document
80
+ end
81
+
82
+ ###
83
+ # Called when document ends parsing
84
+ def end_document
85
+ end
86
+
87
+ ###
88
+ # Called at the beginning of an element
89
+ # * +name+ is the name of the tag
90
+ # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
91
+ # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
92
+ def start_element name, attrs = []
93
+ end
94
+
95
+ ###
96
+ # Called at the end of an element
97
+ # +name+ is the tag name
98
+ def end_element name
99
+ end
100
+
101
+ ###
102
+ # Called at the beginning of an element
103
+ # +name+ is the element name
104
+ # +attrs+ is a list of attributes
105
+ # +prefix+ is the namespace prefix for the element
106
+ # +uri+ is the associated namespace URI
107
+ # +ns+ is a hash of namespace prefix:urls associated with the element
108
+ def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
109
+ ###
110
+ # Deal with SAX v1 interface
111
+ name = [prefix, name].compact.join(':')
112
+ attributes = ns.map { |ns_prefix,ns_uri|
113
+ [['xmlns', ns_prefix].compact.join(':'), ns_uri]
114
+ } + attrs.map { |attr|
115
+ [[attr.prefix, attr.localname].compact.join(':'), attr.value]
116
+ }
117
+ start_element name, attributes
118
+ end
119
+
120
+ ###
121
+ # Called at the end of an element
122
+ # +name+ is the element's name
123
+ # +prefix+ is the namespace prefix associated with the element
124
+ # +uri+ is the associated namespace URI
125
+ def end_element_namespace name, prefix = nil, uri = nil
126
+ ###
127
+ # Deal with SAX v1 interface
128
+ end_element [prefix, name].compact.join(':')
129
+ end
130
+
131
+ ###
132
+ # Characters read between a tag. This method might be called multiple
133
+ # times given one contiguous string of characters.
134
+ #
135
+ # +string+ contains the character data
136
+ def characters string
137
+ end
138
+
139
+ ###
140
+ # Called when comments are encountered
141
+ # +string+ contains the comment data
142
+ def comment string
143
+ end
144
+
145
+ ###
146
+ # Called on document warnings
147
+ # +string+ contains the warning
148
+ def warning string
149
+ end
150
+
151
+ ###
152
+ # Called on document errors
153
+ # +string+ contains the error
154
+ def error string
155
+ end
156
+
157
+ ###
158
+ # Called when cdata blocks are found
159
+ # +string+ contains the cdata content
160
+ def cdata_block string
161
+ end
162
+
163
+ ###
164
+ # Called when processing instructions are found
165
+ # +name+ is the target of the instruction
166
+ # +content+ is the value of the instruction
167
+ def processing_instruction name, content
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ module SAX
5
+ ###
6
+ # This parser is a SAX style parser that reads it's input as it
7
+ # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
8
+ # an optional encoding, then given an XML input, sends messages to
9
+ # the Nokogiri::XML::SAX::Document.
10
+ #
11
+ # Here is an example of using this parser:
12
+ #
13
+ # # Create a subclass of Nokogiri::XML::SAX::Document and implement
14
+ # # the events we care about:
15
+ # class MyDoc < Nokogiri::XML::SAX::Document
16
+ # def start_element name, attrs = []
17
+ # puts "starting: #{name}"
18
+ # end
19
+ #
20
+ # def end_element name
21
+ # puts "ending: #{name}"
22
+ # end
23
+ # end
24
+ #
25
+ # # Create our parser
26
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
27
+ #
28
+ # # Send some XML to the parser
29
+ # parser.parse(File.open(ARGV[0]))
30
+ #
31
+ # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
32
+ # see Nokogiri::XML::SAX::Document for the available events.
33
+ class Parser
34
+ class Attribute < Struct.new(:localname, :prefix, :uri, :value)
35
+ end
36
+
37
+ # Encodinds this parser supports
38
+ ENCODINGS = {
39
+ 'NONE' => 0, # No char encoding detected
40
+ 'UTF-8' => 1, # UTF-8
41
+ 'UTF16LE' => 2, # UTF-16 little endian
42
+ 'UTF16BE' => 3, # UTF-16 big endian
43
+ 'UCS4LE' => 4, # UCS-4 little endian
44
+ 'UCS4BE' => 5, # UCS-4 big endian
45
+ 'EBCDIC' => 6, # EBCDIC uh!
46
+ 'UCS4-2143' => 7, # UCS-4 unusual ordering
47
+ 'UCS4-3412' => 8, # UCS-4 unusual ordering
48
+ 'UCS2' => 9, # UCS-2
49
+ 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
50
+ 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
51
+ 'ISO-8859-3' => 12, # ISO-8859-3
52
+ 'ISO-8859-4' => 13, # ISO-8859-4
53
+ 'ISO-8859-5' => 14, # ISO-8859-5
54
+ 'ISO-8859-6' => 15, # ISO-8859-6
55
+ 'ISO-8859-7' => 16, # ISO-8859-7
56
+ 'ISO-8859-8' => 17, # ISO-8859-8
57
+ 'ISO-8859-9' => 18, # ISO-8859-9
58
+ 'ISO-2022-JP' => 19, # ISO-2022-JP
59
+ 'SHIFT-JIS' => 20, # Shift_JIS
60
+ 'EUC-JP' => 21, # EUC-JP
61
+ 'ASCII' => 22, # pure ASCII
62
+ }
63
+
64
+ # The Nokogiri::XML::SAX::Document where events will be sent.
65
+ attr_accessor :document
66
+
67
+ # The encoding beings used for this document.
68
+ attr_accessor :encoding
69
+
70
+ # Create a new Parser with +doc+ and +encoding+
71
+ def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
72
+ @encoding = check_encoding(encoding)
73
+ @document = doc
74
+ @warned = false
75
+ end
76
+
77
+ ###
78
+ # Parse given +thing+ which may be a string containing xml, or an
79
+ # IO object.
80
+ def parse thing, &block
81
+ if thing.respond_to?(:read) && thing.respond_to?(:close)
82
+ parse_io(thing, &block)
83
+ else
84
+ parse_memory(thing, &block)
85
+ end
86
+ end
87
+
88
+ ###
89
+ # Parse given +io+
90
+ def parse_io io, encoding = 'ASCII'
91
+ @encoding = check_encoding(encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[@encoding])
93
+ yield ctx if block_given?
94
+ ctx.parse_with self
95
+ end
96
+
97
+ ###
98
+ # Parse a file with +filename+
99
+ def parse_file filename
100
+ raise ArgumentError unless filename
101
+ raise Errno::ENOENT unless File.exist?(filename)
102
+ raise Errno::EISDIR if File.directory?(filename)
103
+ ctx = ParserContext.file filename
104
+ yield ctx if block_given?
105
+ ctx.parse_with self
106
+ end
107
+
108
+ def parse_memory data
109
+ ctx = ParserContext.memory data
110
+ yield ctx if block_given?
111
+ ctx.parse_with self
112
+ end
113
+
114
+ private
115
+ def check_encoding(encoding)
116
+ encoding.upcase.tap do |enc|
117
+ raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end