nokogiri 1.16.3 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +14 -22
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +9 -9
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +141 -104
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -14
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.6.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -2,106 +2,168 @@
2
2
 
3
3
  module Nokogiri
4
4
  module XML
5
- ###
6
- # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
7
- # dealing with XML. If you want to do SAX style parsing using HTML, check out
8
- # Nokogiri::HTML4::SAX.
9
- #
10
- # The basic way a SAX style parser works is by creating a parser, telling the parser about the
11
- # events we're interested in, then giving the parser some XML to process. The parser will notify
12
- # you when it encounters events you said you would like to know about.
13
- #
14
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
15
- # methods for which you would like notification.
16
- #
17
- # For example, if I want to be notified when a document ends, and when an element starts, I
18
- # would write a class like this:
19
- #
20
- # class MyDocument < Nokogiri::XML::SAX::Document
21
- # def end_document
22
- # puts "the document has ended"
23
- # end
24
- #
25
- # def start_element name, attributes = []
26
- # puts "#{name} started"
27
- # end
28
- # end
29
- #
30
- # Then I would instantiate a SAX parser with this document, and feed the parser some XML
31
- #
32
- # # Create a new parser
33
- # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
- #
35
- # # Feed the parser some XML
36
- # parser.parse(File.open(ARGV[0]))
37
- #
38
- # Now my document handler will be called when each node starts, and when then document ends. To
39
- # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
40
- #
41
- # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
42
- # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
43
- # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
44
- # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
45
5
  module SAX
46
- ###
47
- # This class is used for registering types of events you are interested in handling. All of
48
- # the methods on this class are available as possible events while parsing an XML document. To
49
- # register for any particular event, just subclass this class and implement the methods you
50
- # are interested in knowing about.
6
+ # :markup: markdown
7
+ #
8
+ # The SAX::Document class is used for registering types of events you are interested in
9
+ # handling. All of the methods on this class are available as possible events while parsing an
10
+ # \XML document. To register for any particular event, subclass this class and implement the
11
+ # methods you are interested in knowing about.
51
12
  #
52
13
  # To only be notified about start and end element events, write a class like this:
53
14
  #
54
- # class MyDocument < Nokogiri::XML::SAX::Document
55
- # def start_element name, attrs = []
56
- # puts "#{name} started!"
57
- # end
15
+ # class MyHandler < Nokogiri::XML::SAX::Document
16
+ # def start_element name, attrs = []
17
+ # puts "#{name} started!"
18
+ # end
58
19
  #
59
- # def end_element name
60
- # puts "#{name} ended"
20
+ # def end_element name
21
+ # puts "#{name} ended"
22
+ # end
61
23
  # end
62
- # end
63
24
  #
64
- # You can use this event handler for any SAX style parser included with Nokogiri. See
65
- # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
25
+ # You can use this event handler for any SAX-style parser included with Nokogiri.
26
+ #
27
+ # See also:
28
+ #
29
+ # - Nokogiri::XML::SAX
30
+ # - Nokogiri::HTML4::SAX
31
+ #
32
+ # ### Entity Handling
33
+ #
34
+ # ⚠ Entity handling is complicated in a SAX parser! Please read this section carefully if
35
+ # you're not getting the behavior you expect.
36
+ #
37
+ # Entities will be reported to the user via callbacks to #characters, to #reference, or
38
+ # possibly to both. The behavior is determined by a combination of _entity type_ and the value
39
+ # of ParserContext#replace_entities. (Recall that the default value of
40
+ # ParserContext#replace_entities is `false`.)
41
+ #
42
+ # ⚠ <b>It is UNSAFE to set ParserContext#replace_entities to `true`</b> when parsing untrusted
43
+ # documents.
44
+ #
45
+ # 💡 For more information on entity types, see [Wikipedia's page on
46
+ # DTDs](https://en.wikipedia.org/wiki/Document_type_definition#Entity_declarations).
47
+ #
48
+ # | Entity type | #characters | #reference |
49
+ # |--------------------------------------|------------------------------------|-------------------------------------|
50
+ # | Char ref (e.g., <tt>&#146;</tt>) | always | never |
51
+ # | Predefined (e.g., <tt>&amp;</tt>) | always | never |
52
+ # | Undeclared † | never | <tt>#replace_entities == false</tt> |
53
+ # | Internal | always | <tt>#replace_entities == false</tt> |
54
+ # | External † | <tt>#replace_entities == true</tt> | <tt>#replace_entities == false</tt> |
55
+ #
56
+ # &nbsp;
57
+ #
58
+ # † In the case where the replacement text for the entity is unknown (e.g., an undeclared entity
59
+ # or an external entity that could not be resolved because of network issues), then the
60
+ # replacement text will not be reported. If ParserContext#replace_entities is `true`, this
61
+ # means the #characters callback will not be invoked. If ParserContext#replace_entities is
62
+ # `false`, then the #reference callback will be invoked, but with `nil` for the `content`
63
+ # argument.
64
+ #
66
65
  class Document
67
66
  ###
68
- # Called when an XML declaration is parsed
67
+ # Called when an \XML declaration is parsed.
68
+ #
69
+ # [Parameters]
70
+ # - +version+ (String) the version attribute
71
+ # - +encoding+ (String, nil) the encoding of the document if present, else +nil+
72
+ # - +standalone+ ("yes", "no", nil) the standalone attribute if present, else +nil+
69
73
  def xmldecl(version, encoding, standalone)
70
74
  end
71
75
 
72
76
  ###
73
- # Called when document starts parsing
77
+ # Called when document starts parsing.
74
78
  def start_document
75
79
  end
76
80
 
77
81
  ###
78
- # Called when document ends parsing
82
+ # Called when document ends parsing.
79
83
  def end_document
80
84
  end
81
85
 
82
86
  ###
83
- # Called at the beginning of an element
84
- # * +name+ is the name of the tag
85
- # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
87
+ # Called at the beginning of an element.
88
+ #
89
+ # [Parameters]
90
+ # - +name+ (String) the name of the element
91
+ # - +attrs+ (Array<Array<String>>) an assoc list of namespace declarations and attributes, e.g.:
86
92
  # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
93
+ #
94
+ # 💡If you're dealing with XML and need to handle namespaces, use the
95
+ # #start_element_namespace method instead.
96
+ #
97
+ # Note that the element namespace and any attribute namespaces are not provided, and so any
98
+ # namespaced elements or attributes will be returned as strings including the prefix:
99
+ #
100
+ # parser.parse(<<~XML)
101
+ # <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
102
+ # <foo:bar foo:quux="xxx">hello world</foo:bar>
103
+ # </root>
104
+ # XML
105
+ #
106
+ # assert_pattern do
107
+ # parser.document.start_elements => [
108
+ # ["root", [["xmlns:foo", "http://foo.example.com/"], ["xmlns", "http://example.com/"]]],
109
+ # ["foo:bar", [["foo:quux", "xxx"]]],
110
+ # ]
111
+ # end
112
+ #
87
113
  def start_element(name, attrs = [])
88
114
  end
89
115
 
90
116
  ###
91
- # Called at the end of an element
92
- # +name+ is the tag name
117
+ # Called at the end of an element.
118
+ #
119
+ # [Parameters]
120
+ # - +name+ (String) the name of the element being closed
121
+ #
93
122
  def end_element(name)
94
123
  end
95
124
 
96
125
  ###
97
- # Called at the beginning of an element
98
- # +name+ is the element name
99
- # +attrs+ is a list of attributes
100
- # +prefix+ is the namespace prefix for the element
101
- # +uri+ is the associated namespace URI
102
- # +ns+ is a hash of namespace prefix:urls associated with the element
126
+ # Called at the beginning of an element.
127
+ #
128
+ # [Parameters]
129
+ # - +name+ (String) is the name of the element
130
+ # - +attrs+ (Array<Attribute>) is an array of structs with the following properties:
131
+ # - +localname+ (String) the local name of the attribute
132
+ # - +value+ (String) the value of the attribute
133
+ # - +prefix+ (String, nil) the namespace prefix of the attribute
134
+ # - +uri+ (String, nil) the namespace URI of the attribute
135
+ # - +prefix+ (String, nil) is the namespace prefix for the element
136
+ # - +uri+ (String, nil) is the associated URI for the element's namespace
137
+ # - +ns+ (Array<Array<String, String>>) is an assoc list of namespace declarations on the element
138
+ #
139
+ # 💡If you're dealing with HTML or don't care about namespaces, try #start_element instead.
140
+ #
141
+ # [Example]
142
+ # it "start_elements_namespace is called with namespaced attributes" do
143
+ # parser.parse(<<~XML)
144
+ # <root xmlns:foo='http://foo.example.com/'>
145
+ # <foo:a foo:bar='hello' />
146
+ # </root>
147
+ # XML
148
+ #
149
+ # assert_pattern do
150
+ # parser.document.start_elements_namespace => [
151
+ # [
152
+ # "root",
153
+ # [],
154
+ # nil, nil,
155
+ # [["foo", "http://foo.example.com/"]], # namespace declarations
156
+ # ], [
157
+ # "a",
158
+ # [Nokogiri::XML::SAX::Parser::Attribute(localname: "bar", prefix: "foo", uri: "http://foo.example.com/", value: "hello")], # prefixed attribute
159
+ # "foo", "http://foo.example.com/", # prefix and uri for the "a" element
160
+ # [],
161
+ # ]
162
+ # ]
163
+ # end
164
+ # end
165
+ #
103
166
  def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
104
- ###
105
167
  # Deal with SAX v1 interface
106
168
  name = [prefix, name].compact.join(":")
107
169
  attributes = ns.map do |ns_prefix, ns_uri|
@@ -113,52 +175,81 @@ module Nokogiri
113
175
  end
114
176
 
115
177
  ###
116
- # Called at the end of an element
117
- # +name+ is the element's name
118
- # +prefix+ is the namespace prefix associated with the element
119
- # +uri+ is the associated namespace URI
178
+ # Called at the end of an element.
179
+ #
180
+ # [Parameters]
181
+ # - +name+ (String) is the name of the element
182
+ # - +prefix+ (String, nil) is the namespace prefix for the element
183
+ # - +uri+ (String, nil) is the associated URI for the element's namespace
184
+ #
120
185
  def end_element_namespace(name, prefix = nil, uri = nil)
121
- ###
122
186
  # Deal with SAX v1 interface
123
187
  end_element([prefix, name].compact.join(":"))
124
188
  end
125
189
 
126
190
  ###
127
- # Characters read between a tag. This method might be called multiple
128
- # times given one contiguous string of characters.
191
+ # Called when character data is parsed, and for parsed entities when
192
+ # ParserContext#replace_entities is +true+.
193
+ #
194
+ # [Parameters]
195
+ # - +string+ contains the character data or entity replacement text
196
+ #
197
+ # ⚠ Please see Document@Entity+Handling for important information about how entities are handled.
198
+ #
199
+ # ⚠ This method might be called multiple times for a contiguous string of characters.
129
200
  #
130
- # +string+ contains the character data
131
201
  def characters(string)
132
202
  end
133
203
 
204
+ ###
205
+ # Called when a parsed entity is referenced and not replaced.
206
+ #
207
+ # [Parameters]
208
+ # - +name+ (String) is the name of the entity
209
+ # - +content+ (String, nil) is the replacement text for the entity, if known
210
+ #
211
+ # ⚠ Please see Document@Entity+Handling for important information about how entities are handled.
212
+ #
213
+ # ⚠ An internal entity may result in a call to both #characters and #reference.
214
+ #
215
+ # Since v1.17.0
216
+ #
217
+ def reference(name, content)
218
+ end
219
+
134
220
  ###
135
221
  # Called when comments are encountered
136
- # +string+ contains the comment data
222
+ # [Parameters]
223
+ # - +string+ contains the comment data
137
224
  def comment(string)
138
225
  end
139
226
 
140
227
  ###
141
228
  # Called on document warnings
142
- # +string+ contains the warning
229
+ # [Parameters]
230
+ # - +string+ contains the warning
143
231
  def warning(string)
144
232
  end
145
233
 
146
234
  ###
147
235
  # Called on document errors
148
- # +string+ contains the error
236
+ # [Parameters]
237
+ # - +string+ contains the error
149
238
  def error(string)
150
239
  end
151
240
 
152
241
  ###
153
242
  # Called when cdata blocks are found
154
- # +string+ contains the cdata content
243
+ # [Parameters]
244
+ # - +string+ contains the cdata content
155
245
  def cdata_block(string)
156
246
  end
157
247
 
158
248
  ###
159
249
  # Called when processing instructions are found
160
- # +name+ is the target of the instruction
161
- # +content+ is the value of the instruction
250
+ # [Parameters]
251
+ # - +name+ is the target of the instruction
252
+ # - +content+ is the value of the instruction
162
253
  def processing_instruction(name, content)
163
254
  end
164
255
  end
@@ -4,16 +4,15 @@ module Nokogiri
4
4
  module XML
5
5
  module SAX
6
6
  ###
7
- # This parser is a SAX style parser that reads it's input as it
8
- # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
9
- # an optional encoding, then given an XML input, sends messages to
10
- # the Nokogiri::XML::SAX::Document.
7
+ # This parser is a SAX style parser that reads its input as it deems necessary. The parser
8
+ # takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an XML input, sends
9
+ # messages to the Nokogiri::XML::SAX::Document.
11
10
  #
12
11
  # Here is an example of using this parser:
13
12
  #
14
13
  # # Create a subclass of Nokogiri::XML::SAX::Document and implement
15
14
  # # the events we care about:
16
- # class MyDoc < Nokogiri::XML::SAX::Document
15
+ # class MyHandler < Nokogiri::XML::SAX::Document
17
16
  # def start_element name, attrs = []
18
17
  # puts "starting: #{name}"
19
18
  # end
@@ -23,20 +22,28 @@ module Nokogiri
23
22
  # end
24
23
  # end
25
24
  #
26
- # # Create our parser
27
- # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
25
+ # parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
28
26
  #
29
- # # Send some XML to the parser
30
- # parser.parse(File.open(ARGV[0]))
27
+ # # Hand an IO object to the parser, which will read the XML from the IO.
28
+ # File.open(path_to_xml) do |f|
29
+ # parser.parse(f)
30
+ # end
31
+ #
32
+ # For more information about \SAX parsers, see Nokogiri::XML::SAX.
33
+ #
34
+ # Also see Nokogiri::XML::SAX::Document for the available events.
35
+ #
36
+ # For \HTML documents, use the subclass Nokogiri::HTML4::SAX::Parser.
31
37
  #
32
- # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
33
- # see Nokogiri::XML::SAX::Document for the available events.
34
38
  class Parser
39
+ # to dynamically resolve ParserContext in inherited methods
40
+ include Nokogiri::ClassResolver
41
+
42
+ # Structure used for marshalling attributes for some callbacks in XML::SAX::Document.
35
43
  class Attribute < Struct.new(:localname, :prefix, :uri, :value)
36
44
  end
37
45
 
38
- # Encodinds this parser supports
39
- ENCODINGS = {
46
+ ENCODINGS = { # :nodoc:
40
47
  "NONE" => 0, # No char encoding detected
41
48
  "UTF-8" => 1, # UTF-8
42
49
  "UTF16LE" => 2, # UTF-16 little endian
@@ -61,6 +68,8 @@ module Nokogiri
61
68
  "EUC-JP" => 21, # EUC-JP
62
69
  "ASCII" => 22, # pure ASCII
63
70
  }
71
+ REVERSE_ENCODINGS = ENCODINGS.invert # :nodoc:
72
+ deprecate_constant :ENCODINGS
64
73
 
65
74
  # The Nokogiri::XML::SAX::Document where events will be sent.
66
75
  attr_accessor :document
@@ -68,57 +77,122 @@ module Nokogiri
68
77
  # The encoding beings used for this document.
69
78
  attr_accessor :encoding
70
79
 
71
- # Create a new Parser with +doc+ and +encoding+
72
- def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
73
- @encoding = check_encoding(encoding)
80
+ ###
81
+ # :call-seq:
82
+ # new SAX::Parser
83
+ # new(handler) ⇒ SAX::Parser
84
+ # new(handler, encoding) ⇒ SAX::Parser
85
+ #
86
+ # Create a new Parser.
87
+ #
88
+ # [Parameters]
89
+ # - +handler+ (optional Nokogiri::XML::SAX::Document) The document that will receive
90
+ # events. Will create a new Nokogiri::XML::SAX::Document if not given, which is accessible
91
+ # through the #document attribute.
92
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
93
+ # parsing the input. (default +nil+ for auto-detection)
94
+ #
95
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = nil)
96
+ @encoding = encoding
74
97
  @document = doc
75
98
  @warned = false
99
+
100
+ initialize_native unless Nokogiri.jruby?
76
101
  end
77
102
 
78
103
  ###
79
- # Parse given +thing+ which may be a string containing xml, or an
80
- # IO object.
81
- def parse(thing, &block)
82
- if thing.respond_to?(:read) && thing.respond_to?(:close)
83
- parse_io(thing, &block)
104
+ # :call-seq:
105
+ # parse(input) { |parser_context| ... }
106
+ #
107
+ # Parse the input, sending events to the SAX::Document at #document.
108
+ #
109
+ # [Parameters]
110
+ # - +input+ (String, IO) The input to parse.
111
+ #
112
+ # If +input+ quacks like a readable IO object, this method forwards to Parser.parse_io,
113
+ # otherwise it forwards to Parser.parse_memory.
114
+ #
115
+ # [Yields]
116
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
117
+ # to set options on the parser context before parsing begins.
118
+ #
119
+ def parse(input, &block)
120
+ if input.respond_to?(:read) && input.respond_to?(:close)
121
+ parse_io(input, &block)
84
122
  else
85
- parse_memory(thing, &block)
123
+ parse_memory(input, &block)
86
124
  end
87
125
  end
88
126
 
89
127
  ###
90
- # Parse given +io+
128
+ # :call-seq:
129
+ # parse_io(io) { |parser_context| ... }
130
+ # parse_io(io, encoding) { |parser_context| ... }
131
+ #
132
+ # Parse an input stream.
133
+ #
134
+ # [Parameters]
135
+ # - +io+ (IO) The readable IO object from which to read input
136
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
137
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
138
+ #
139
+ # [Yields]
140
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
141
+ # to set options on the parser context before parsing begins.
142
+ #
91
143
  def parse_io(io, encoding = @encoding)
92
- ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
144
+ ctx = related_class("ParserContext").io(io, encoding)
93
145
  yield ctx if block_given?
94
146
  ctx.parse_with(self)
95
147
  end
96
148
 
97
149
  ###
98
- # Parse a file with +filename+
99
- def parse_file(filename)
100
- raise ArgumentError unless filename
101
- raise Errno::ENOENT unless File.exist?(filename)
102
- raise Errno::EISDIR if File.directory?(filename)
103
-
104
- ctx = ParserContext.file(filename)
150
+ # :call-seq:
151
+ # parse_memory(input) { |parser_context| ... }
152
+ # parse_memory(input, encoding) { |parser_context| ... }
153
+ #
154
+ # Parse an input string.
155
+ #
156
+ # [Parameters]
157
+ # - +input+ (String) The input string to be parsed.
158
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
159
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
160
+ #
161
+ # [Yields]
162
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
163
+ # to set options on the parser context before parsing begins.
164
+ #
165
+ def parse_memory(input, encoding = @encoding)
166
+ ctx = related_class("ParserContext").memory(input, encoding)
105
167
  yield ctx if block_given?
106
168
  ctx.parse_with(self)
107
169
  end
108
170
 
109
- def parse_memory(data)
110
- ctx = ParserContext.memory(data)
171
+ ###
172
+ # :call-seq:
173
+ # parse_file(filename) { |parser_context| ... }
174
+ # parse_file(filename, encoding) { |parser_context| ... }
175
+ #
176
+ # Parse a file.
177
+ #
178
+ # [Parameters]
179
+ # - +filename+ (String) The path to the file to be parsed.
180
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
181
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
182
+ #
183
+ # [Yields]
184
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
185
+ # to set options on the parser context before parsing begins.
186
+ #
187
+ def parse_file(filename, encoding = @encoding)
188
+ raise ArgumentError, "no filename provided" unless filename
189
+ raise Errno::ENOENT unless File.exist?(filename)
190
+ raise Errno::EISDIR if File.directory?(filename)
191
+
192
+ ctx = related_class("ParserContext").file(filename, encoding)
111
193
  yield ctx if block_given?
112
194
  ctx.parse_with(self)
113
195
  end
114
-
115
- private
116
-
117
- def check_encoding(encoding)
118
- encoding.upcase.tap do |enc|
119
- raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
120
- end
121
- end
122
196
  end
123
197
  end
124
198
  end