rexml 3.2.5 → 3.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +667 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +24 -19
- data/lib/rexml/cdata.rb +1 -1
- data/lib/rexml/child.rb +2 -3
- data/lib/rexml/comment.rb +1 -1
- data/lib/rexml/doctype.rb +3 -8
- data/lib/rexml/document.rb +27 -7
- data/lib/rexml/element.rb +66 -87
- data/lib/rexml/encoding.rb +3 -6
- data/lib/rexml/entity.rb +9 -38
- data/lib/rexml/formatters/pretty.rb +3 -3
- data/lib/rexml/functions.rb +4 -5
- data/lib/rexml/instruction.rb +1 -1
- data/lib/rexml/namespace.rb +12 -8
- data/lib/rexml/node.rb +10 -6
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +543 -288
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/parsers/xpathparser.rb +139 -89
- data/lib/rexml/quickpath.rb +19 -18
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/security.rb +2 -2
- data/lib/rexml/source.rb +190 -100
- data/lib/rexml/text.rb +74 -78
- data/lib/rexml/validation/relaxng.rb +27 -26
- data/lib/rexml/validation/validation.rb +8 -8
- data/lib/rexml/xpath.rb +2 -13
- data/lib/rexml/xpath_parser.rb +51 -45
- metadata +10 -52
|
@@ -1,12 +1,40 @@
|
|
|
1
|
-
# frozen_string_literal:
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
require_relative '../parseexception'
|
|
3
3
|
require_relative '../undefinednamespaceexception'
|
|
4
|
+
require_relative '../security'
|
|
4
5
|
require_relative '../source'
|
|
5
6
|
require 'set'
|
|
6
7
|
require "strscan"
|
|
7
8
|
|
|
8
9
|
module REXML
|
|
9
10
|
module Parsers
|
|
11
|
+
unless [].respond_to?(:tally)
|
|
12
|
+
module EnumerableTally
|
|
13
|
+
refine Enumerable do
|
|
14
|
+
def tally
|
|
15
|
+
counts = {}
|
|
16
|
+
each do |item|
|
|
17
|
+
counts[item] ||= 0
|
|
18
|
+
counts[item] += 1
|
|
19
|
+
end
|
|
20
|
+
counts
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
using EnumerableTally
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if StringScanner::Version < "3.0.8"
|
|
28
|
+
module StringScannerCaptures
|
|
29
|
+
refine StringScanner do
|
|
30
|
+
def captures
|
|
31
|
+
values_at(*(1...size))
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
using StringScannerCaptures
|
|
36
|
+
end
|
|
37
|
+
|
|
10
38
|
# = Using the Pull Parser
|
|
11
39
|
# <em>This API is experimental, and subject to change.</em>
|
|
12
40
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
@@ -96,7 +124,7 @@ module REXML
|
|
|
96
124
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
97
125
|
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
98
126
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
99
|
-
ENTITYDECL = /\s*(?:#{GEDECL})
|
|
127
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
|
100
128
|
|
|
101
129
|
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
|
102
130
|
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
|
@@ -112,9 +140,36 @@ module REXML
|
|
|
112
140
|
"apos" => [/'/, "'", "'", /'/]
|
|
113
141
|
}
|
|
114
142
|
|
|
143
|
+
module Private
|
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
145
|
+
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
146
|
+
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
147
|
+
EQUAL_PATTERN = /\s*=\s*/um
|
|
148
|
+
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
149
|
+
NAME_PATTERN = /#{NAME}/um
|
|
150
|
+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
151
|
+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
152
|
+
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
153
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
154
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
155
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
|
156
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
|
157
|
+
default_entities.each do |term|
|
|
158
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
|
159
|
+
end
|
|
160
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
161
|
+
end
|
|
162
|
+
private_constant :Private
|
|
163
|
+
|
|
115
164
|
def initialize( source )
|
|
116
165
|
self.stream = source
|
|
117
166
|
@listeners = []
|
|
167
|
+
@prefixes = Set.new
|
|
168
|
+
@entity_expansion_count = 0
|
|
169
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
|
170
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
|
171
|
+
@source.ensure_buffer
|
|
172
|
+
@version = nil
|
|
118
173
|
end
|
|
119
174
|
|
|
120
175
|
def add_listener( listener )
|
|
@@ -122,15 +177,24 @@ module REXML
|
|
|
122
177
|
end
|
|
123
178
|
|
|
124
179
|
attr_reader :source
|
|
180
|
+
attr_reader :entity_expansion_count
|
|
181
|
+
attr_writer :entity_expansion_limit
|
|
182
|
+
attr_writer :entity_expansion_text_limit
|
|
125
183
|
|
|
126
184
|
def stream=( source )
|
|
127
185
|
@source = SourceFactory.create_from( source )
|
|
186
|
+
reset
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def reset
|
|
128
190
|
@closed = nil
|
|
191
|
+
@have_root = false
|
|
129
192
|
@document_status = nil
|
|
130
193
|
@tags = []
|
|
131
194
|
@stack = []
|
|
132
195
|
@entities = []
|
|
133
|
-
@
|
|
196
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
|
197
|
+
@namespaces_restore_stack = []
|
|
134
198
|
end
|
|
135
199
|
|
|
136
200
|
def position
|
|
@@ -144,12 +208,12 @@ module REXML
|
|
|
144
208
|
|
|
145
209
|
# Returns true if there are no more events
|
|
146
210
|
def empty?
|
|
147
|
-
|
|
211
|
+
(@source.empty? and @stack.empty?)
|
|
148
212
|
end
|
|
149
213
|
|
|
150
214
|
# Returns true if there are more events. Synonymous with !empty?
|
|
151
215
|
def has_next?
|
|
152
|
-
|
|
216
|
+
!(@source.empty? and @stack.empty?)
|
|
153
217
|
end
|
|
154
218
|
|
|
155
219
|
# Push an event back on the head of the stream. This method
|
|
@@ -180,6 +244,8 @@ module REXML
|
|
|
180
244
|
|
|
181
245
|
# Returns the next event. This is a +PullEvent+ object.
|
|
182
246
|
def pull
|
|
247
|
+
@source.drop_parsed_content
|
|
248
|
+
|
|
183
249
|
pull_event.tap do |event|
|
|
184
250
|
@listeners.each do |listener|
|
|
185
251
|
listener.receive event
|
|
@@ -192,236 +258,268 @@ module REXML
|
|
|
192
258
|
x, @closed = @closed, nil
|
|
193
259
|
return [ :end_element, x ]
|
|
194
260
|
end
|
|
195
|
-
|
|
261
|
+
if empty?
|
|
262
|
+
if @document_status == :in_doctype
|
|
263
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
|
264
|
+
end
|
|
265
|
+
unless @tags.empty?
|
|
266
|
+
path = "/" + @tags.join("/")
|
|
267
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
unless @document_status == :in_element
|
|
271
|
+
raise ParseException.new("Malformed XML: No root element", @source)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
return [ :end_document ]
|
|
275
|
+
end
|
|
196
276
|
return @stack.shift if @stack.size > 0
|
|
197
277
|
#STDERR.puts @source.encoding
|
|
198
278
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
|
279
|
+
|
|
280
|
+
@source.ensure_buffer
|
|
199
281
|
if @document_status == nil
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
#STDERR.puts "WORD = #{word.inspect}"
|
|
203
|
-
case word
|
|
204
|
-
when COMMENT_START
|
|
205
|
-
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
|
206
|
-
when XMLDECL_START
|
|
207
|
-
#STDERR.puts "XMLDECL"
|
|
208
|
-
results = @source.match( XMLDECL_PATTERN, true )[1]
|
|
209
|
-
version = VERSION.match( results )
|
|
210
|
-
version = version[1] unless version.nil?
|
|
211
|
-
encoding = ENCODING.match(results)
|
|
212
|
-
encoding = encoding[1] unless encoding.nil?
|
|
213
|
-
if need_source_encoding_update?(encoding)
|
|
214
|
-
@source.encoding = encoding
|
|
215
|
-
end
|
|
216
|
-
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
|
217
|
-
encoding = "UTF-16"
|
|
218
|
-
end
|
|
219
|
-
standalone = STANDALONE.match(results)
|
|
220
|
-
standalone = standalone[1] unless standalone.nil?
|
|
221
|
-
return [ :xmldecl, version, encoding, standalone ]
|
|
222
|
-
when INSTRUCTION_START
|
|
282
|
+
start_position = @source.position
|
|
283
|
+
if @source.match?("<?", true)
|
|
223
284
|
return process_instruction
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
@
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
accept_external_id: true,
|
|
238
|
-
accept_public_id: false)
|
|
239
|
-
if id[0] == "SYSTEM"
|
|
240
|
-
# For backward compatibility
|
|
241
|
-
id[1], id[2] = id[2], nil
|
|
285
|
+
elsif @source.match?("<!", true)
|
|
286
|
+
if @source.match?("--", true)
|
|
287
|
+
return [ :comment, process_comment ]
|
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
|
289
|
+
base_error_message = "Malformed DOCTYPE"
|
|
290
|
+
unless @source.skip_spaces
|
|
291
|
+
if @source.match?(">")
|
|
292
|
+
message = "#{base_error_message}: name is missing"
|
|
293
|
+
else
|
|
294
|
+
message = "#{base_error_message}: invalid name"
|
|
295
|
+
end
|
|
296
|
+
@source.position = start_position
|
|
297
|
+
raise REXML::ParseException.new(message, @source)
|
|
242
298
|
end
|
|
243
|
-
|
|
299
|
+
name = parse_name(base_error_message)
|
|
300
|
+
@source.skip_spaces
|
|
301
|
+
if @source.match?("[", true)
|
|
302
|
+
id = [nil, nil, nil]
|
|
244
303
|
@document_status = :in_doctype
|
|
245
|
-
elsif @source.match(
|
|
304
|
+
elsif @source.match?(">", true)
|
|
305
|
+
id = [nil, nil, nil]
|
|
246
306
|
@document_status = :after_doctype
|
|
307
|
+
@source.ensure_buffer
|
|
247
308
|
else
|
|
248
|
-
|
|
249
|
-
|
|
309
|
+
id = parse_id(base_error_message,
|
|
310
|
+
accept_external_id: true,
|
|
311
|
+
accept_public_id: false)
|
|
312
|
+
if id[0] == "SYSTEM"
|
|
313
|
+
# For backward compatibility
|
|
314
|
+
id[1], id[2] = id[2], nil
|
|
315
|
+
end
|
|
316
|
+
@source.skip_spaces
|
|
317
|
+
if @source.match?("[", true)
|
|
318
|
+
@document_status = :in_doctype
|
|
319
|
+
elsif @source.match?(">", true)
|
|
320
|
+
@document_status = :after_doctype
|
|
321
|
+
@source.ensure_buffer
|
|
322
|
+
else
|
|
323
|
+
message = "#{base_error_message}: garbage after external ID"
|
|
324
|
+
raise REXML::ParseException.new(message, @source)
|
|
325
|
+
end
|
|
250
326
|
end
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
@document_status = :after_doctype
|
|
261
|
-
if @source.encoding == "UTF-8"
|
|
262
|
-
@source.buffer.force_encoding(::Encoding::UTF_8)
|
|
327
|
+
args = [:start_doctype, name, *id]
|
|
328
|
+
if @document_status == :after_doctype
|
|
329
|
+
@source.skip_spaces
|
|
330
|
+
@stack << [ :end_doctype ]
|
|
331
|
+
end
|
|
332
|
+
return args
|
|
333
|
+
else
|
|
334
|
+
message = "Invalid XML"
|
|
335
|
+
raise REXML::ParseException.new(message, @source)
|
|
263
336
|
end
|
|
264
337
|
end
|
|
265
338
|
end
|
|
266
339
|
if @document_status == :in_doctype
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
match[0] = :entitydecl
|
|
279
|
-
ref = false
|
|
280
|
-
if match[1] == '%'
|
|
281
|
-
ref = true
|
|
282
|
-
match.delete_at 1
|
|
283
|
-
end
|
|
284
|
-
# Now we have to sort out what kind of entity reference this is
|
|
285
|
-
if match[2] == 'SYSTEM'
|
|
286
|
-
# External reference
|
|
287
|
-
match[3] = match[3][1..-2] # PUBID
|
|
288
|
-
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
289
|
-
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
290
|
-
elsif match[2] == 'PUBLIC'
|
|
291
|
-
# External reference
|
|
292
|
-
match[3] = match[3][1..-2] # PUBID
|
|
293
|
-
match[4] = match[4][1..-2] # HREF
|
|
294
|
-
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
295
|
-
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
296
|
-
else
|
|
297
|
-
match[2] = match[2][1..-2]
|
|
298
|
-
match.pop if match.size == 4
|
|
299
|
-
# match is [ :entity, name, value ]
|
|
300
|
-
end
|
|
301
|
-
match << '%' if ref
|
|
302
|
-
return match
|
|
303
|
-
when ATTLISTDECL_START
|
|
304
|
-
md = @source.match( ATTLISTDECL_PATTERN, true )
|
|
305
|
-
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
306
|
-
element = md[1]
|
|
307
|
-
contents = md[0]
|
|
308
|
-
|
|
309
|
-
pairs = {}
|
|
310
|
-
values = md[0].scan( ATTDEF_RE )
|
|
311
|
-
values.each do |attdef|
|
|
312
|
-
unless attdef[3] == "#IMPLIED"
|
|
313
|
-
attdef.compact!
|
|
314
|
-
val = attdef[3]
|
|
315
|
-
val = attdef[4] if val == "#FIXED "
|
|
316
|
-
pairs[attdef[0]] = val
|
|
317
|
-
if attdef[0] =~ /^xmlns:(.*)/
|
|
318
|
-
@nsstack[0] << $1
|
|
319
|
-
end
|
|
340
|
+
@source.skip_spaces
|
|
341
|
+
start_position = @source.position
|
|
342
|
+
if @source.match?("<!", true)
|
|
343
|
+
if @source.match?("ELEMENT", true)
|
|
344
|
+
md = @source.match(/(.*?)>/um, true)
|
|
345
|
+
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
346
|
+
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
347
|
+
elsif @source.match?("ENTITY", true)
|
|
348
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
349
|
+
unless match_data
|
|
350
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
320
351
|
end
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
352
|
+
match = [:entitydecl, *match_data.captures.compact]
|
|
353
|
+
ref = false
|
|
354
|
+
if match[1] == '%'
|
|
355
|
+
ref = true
|
|
356
|
+
match.delete_at 1
|
|
357
|
+
end
|
|
358
|
+
# Now we have to sort out what kind of entity reference this is
|
|
359
|
+
if match[2] == 'SYSTEM'
|
|
360
|
+
# External reference
|
|
361
|
+
match[3] = match[3][1..-2] # PUBID
|
|
362
|
+
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
363
|
+
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
364
|
+
elsif match[2] == 'PUBLIC'
|
|
365
|
+
# External reference
|
|
366
|
+
match[3] = match[3][1..-2] # PUBID
|
|
367
|
+
match[4] = match[4][1..-2] # HREF
|
|
368
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
369
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
370
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
|
371
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
|
328
372
|
else
|
|
329
|
-
|
|
373
|
+
match[2] = match[2][1..-2]
|
|
374
|
+
match.pop if match.size == 4
|
|
375
|
+
# match is [ :entity, name, value ]
|
|
330
376
|
end
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
377
|
+
match << '%' if ref
|
|
378
|
+
return match
|
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
|
380
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
381
|
+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
382
|
+
element = md[1]
|
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
|
384
|
+
|
|
385
|
+
pairs = {}
|
|
386
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
|
387
|
+
values.each do |attdef|
|
|
388
|
+
unless attdef[3] == "#IMPLIED"
|
|
389
|
+
attdef.compact!
|
|
390
|
+
val = attdef[3]
|
|
391
|
+
val = attdef[4] if val == "#FIXED "
|
|
392
|
+
pairs[attdef[0]] = val
|
|
393
|
+
if attdef[0] =~ /^xmlns:(.*)/
|
|
394
|
+
@namespaces[$1] = val
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
return [ :attlistdecl, element, pairs, contents ]
|
|
399
|
+
elsif @source.match?("NOTATION", true)
|
|
400
|
+
base_error_message = "Malformed notation declaration"
|
|
401
|
+
unless @source.skip_spaces
|
|
402
|
+
if @source.match?(">")
|
|
403
|
+
message = "#{base_error_message}: name is missing"
|
|
404
|
+
else
|
|
405
|
+
message = "#{base_error_message}: invalid name"
|
|
406
|
+
end
|
|
407
|
+
@source.position = start_position
|
|
408
|
+
raise REXML::ParseException.new(message, @source)
|
|
409
|
+
end
|
|
410
|
+
name = parse_name(base_error_message)
|
|
411
|
+
id = parse_id(base_error_message,
|
|
412
|
+
accept_external_id: true,
|
|
413
|
+
accept_public_id: true)
|
|
414
|
+
@source.skip_spaces
|
|
415
|
+
unless @source.match?(">", true)
|
|
416
|
+
message = "#{base_error_message}: garbage before end >"
|
|
417
|
+
raise REXML::ParseException.new(message, @source)
|
|
418
|
+
end
|
|
419
|
+
return [:notationdecl, name, *id]
|
|
420
|
+
elsif @source.match?("--", true)
|
|
421
|
+
return [ :comment, process_comment ]
|
|
422
|
+
else
|
|
423
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
|
|
340
424
|
end
|
|
341
|
-
|
|
342
|
-
|
|
425
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
426
|
+
return [ :externalentity, match[1] ]
|
|
427
|
+
elsif @source.match?(/\]\s*>/um, true)
|
|
343
428
|
@document_status = :after_doctype
|
|
344
|
-
@source.match( DOCTYPE_END, true )
|
|
345
429
|
return [ :end_doctype ]
|
|
430
|
+
else
|
|
431
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
346
432
|
end
|
|
347
433
|
end
|
|
348
434
|
if @document_status == :after_doctype
|
|
349
|
-
@source.
|
|
435
|
+
@source.skip_spaces
|
|
350
436
|
end
|
|
351
437
|
begin
|
|
352
|
-
|
|
353
|
-
if @source.
|
|
354
|
-
|
|
355
|
-
|
|
438
|
+
start_position = @source.position
|
|
439
|
+
if @source.match?("<", true)
|
|
440
|
+
# :text's read_until may remain only "<" in buffer. In the
|
|
441
|
+
# case, buffer is empty here. So we need to fill buffer
|
|
442
|
+
# here explicitly.
|
|
443
|
+
@source.ensure_buffer
|
|
444
|
+
if @source.match?("/", true)
|
|
445
|
+
@namespaces_restore_stack.pop
|
|
356
446
|
last_tag = @tags.pop
|
|
357
|
-
md = @source.match(
|
|
447
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
358
448
|
if md and !last_tag
|
|
359
449
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
|
360
450
|
raise REXML::ParseException.new(message, @source)
|
|
361
451
|
end
|
|
362
452
|
if md.nil? or last_tag != md[1]
|
|
363
453
|
message = "Missing end tag for '#{last_tag}'"
|
|
364
|
-
message
|
|
454
|
+
message += " (got '#{md[1]}')" if md
|
|
455
|
+
@source.position = start_position if md.nil?
|
|
365
456
|
raise REXML::ParseException.new(message, @source)
|
|
366
457
|
end
|
|
367
458
|
return [ :end_element, last_tag ]
|
|
368
|
-
elsif @source.
|
|
369
|
-
md = @source.match(/\A(\s*[^>]*>)/um)
|
|
459
|
+
elsif @source.match?("!", true)
|
|
370
460
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
461
|
+
if @source.match?("--", true)
|
|
462
|
+
return [ :comment, process_comment ]
|
|
463
|
+
elsif @source.match?("[CDATA[", true)
|
|
464
|
+
text = @source.read_until("]]>")
|
|
465
|
+
if text.chomp!("]]>")
|
|
466
|
+
return [ :cdata, text ]
|
|
467
|
+
else
|
|
468
|
+
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
|
|
378
469
|
end
|
|
379
|
-
|
|
380
|
-
return [ :comment, md[1] ] if md
|
|
381
470
|
else
|
|
382
|
-
|
|
383
|
-
return [ :cdata, md[1] ] if md
|
|
471
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
|
|
384
472
|
end
|
|
385
|
-
|
|
386
|
-
"in the doctype declaration.", @source)
|
|
387
|
-
elsif @source.buffer[1] == ??
|
|
473
|
+
elsif @source.match?("?", true)
|
|
388
474
|
return process_instruction
|
|
389
475
|
else
|
|
390
476
|
# Get the next tag
|
|
391
|
-
md = @source.match(
|
|
477
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
|
392
478
|
unless md
|
|
479
|
+
@source.position = start_position
|
|
393
480
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
|
394
481
|
end
|
|
482
|
+
tag = md[1]
|
|
395
483
|
@document_status = :in_element
|
|
396
|
-
prefixes
|
|
397
|
-
prefixes << md[2] if md[2]
|
|
398
|
-
|
|
399
|
-
attributes, closed = parse_attributes(prefixes
|
|
484
|
+
@prefixes.clear
|
|
485
|
+
@prefixes << md[2] if md[2]
|
|
486
|
+
push_namespaces_restore
|
|
487
|
+
attributes, closed = parse_attributes(@prefixes)
|
|
400
488
|
# Verify that all of the prefixes have been defined
|
|
401
|
-
for prefix in prefixes
|
|
402
|
-
unless @
|
|
489
|
+
for prefix in @prefixes
|
|
490
|
+
unless @namespaces.key?(prefix)
|
|
403
491
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
|
404
492
|
end
|
|
405
493
|
end
|
|
406
494
|
|
|
407
495
|
if closed
|
|
408
|
-
@closed =
|
|
409
|
-
|
|
496
|
+
@closed = tag
|
|
497
|
+
pop_namespaces_restore
|
|
410
498
|
else
|
|
411
|
-
@tags.
|
|
499
|
+
if @tags.empty? and @have_root
|
|
500
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
|
501
|
+
end
|
|
502
|
+
@tags.push( tag )
|
|
412
503
|
end
|
|
413
|
-
|
|
504
|
+
@have_root = true
|
|
505
|
+
return [ :start_element, tag, attributes ]
|
|
414
506
|
end
|
|
415
507
|
else
|
|
416
|
-
|
|
417
|
-
if
|
|
418
|
-
@source.
|
|
508
|
+
text = @source.read_until("<")
|
|
509
|
+
if text.chomp!("<")
|
|
510
|
+
@source.position -= "<".bytesize
|
|
419
511
|
end
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
512
|
+
if @tags.empty?
|
|
513
|
+
unless /\A\s*\z/.match?(text)
|
|
514
|
+
if @have_root
|
|
515
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
|
516
|
+
else
|
|
517
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
return pull_event if @have_root
|
|
521
|
+
end
|
|
522
|
+
return [ :text, text ]
|
|
425
523
|
end
|
|
426
524
|
rescue REXML::UndefinedNamespaceException
|
|
427
525
|
raise
|
|
@@ -431,18 +529,19 @@ module REXML
|
|
|
431
529
|
raise REXML::ParseException.new( "Exception parsing",
|
|
432
530
|
@source, self, (error ? error : $!) )
|
|
433
531
|
end
|
|
434
|
-
|
|
532
|
+
# NOTE: The end of the method never runs, because it is unreachable.
|
|
533
|
+
# All branches of code above have explicit unconditional return or raise statements.
|
|
435
534
|
end
|
|
436
535
|
private :pull_event
|
|
437
536
|
|
|
438
537
|
def entity( reference, entities )
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
unnormalize( value, entities )
|
|
538
|
+
return unless entities
|
|
539
|
+
|
|
540
|
+
value = entities[ reference ]
|
|
541
|
+
return if value.nil?
|
|
542
|
+
|
|
543
|
+
record_entity_expansion
|
|
544
|
+
unnormalize( value, entities )
|
|
446
545
|
end
|
|
447
546
|
|
|
448
547
|
# Escapes all possible entities
|
|
@@ -463,52 +562,108 @@ module REXML
|
|
|
463
562
|
|
|
464
563
|
# Unescapes all possible entities
|
|
465
564
|
def unnormalize( string, entities=nil, filter=nil )
|
|
466
|
-
|
|
467
|
-
|
|
565
|
+
if string.include?("\r")
|
|
566
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
|
567
|
+
else
|
|
568
|
+
rv = string.dup
|
|
569
|
+
end
|
|
468
570
|
matches = rv.scan( REFERENCE_RE )
|
|
469
571
|
return rv if matches.size == 0
|
|
470
|
-
rv.gsub!(
|
|
572
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
471
573
|
m=$1
|
|
472
|
-
|
|
473
|
-
|
|
574
|
+
if m.start_with?("x")
|
|
575
|
+
code_point = Integer(m[1..-1], 16)
|
|
576
|
+
else
|
|
577
|
+
code_point = Integer(m, 10)
|
|
578
|
+
end
|
|
579
|
+
[code_point].pack('U*')
|
|
474
580
|
}
|
|
475
581
|
matches.collect!{|x|x[0]}.compact!
|
|
582
|
+
if filter
|
|
583
|
+
matches.reject! do |entity_reference|
|
|
584
|
+
filter.include?(entity_reference)
|
|
585
|
+
end
|
|
586
|
+
end
|
|
476
587
|
if matches.size > 0
|
|
477
|
-
matches.each do |entity_reference|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
588
|
+
matches.tally.each do |entity_reference, n|
|
|
589
|
+
entity_expansion_count_before = @entity_expansion_count
|
|
590
|
+
entity_value = entity( entity_reference, entities )
|
|
591
|
+
if entity_value
|
|
592
|
+
if n > 1
|
|
593
|
+
entity_expansion_count_delta =
|
|
594
|
+
@entity_expansion_count - entity_expansion_count_before
|
|
595
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
|
596
|
+
end
|
|
597
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
|
598
|
+
rv.gsub!( re, entity_value )
|
|
599
|
+
if rv.bytesize > @entity_expansion_text_limit
|
|
600
|
+
raise "entity expansion has grown too large"
|
|
486
601
|
end
|
|
602
|
+
else
|
|
603
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
|
604
|
+
rv.gsub!( er[0], er[2] ) if er
|
|
487
605
|
end
|
|
488
606
|
end
|
|
489
|
-
rv.gsub!(
|
|
607
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
|
490
608
|
end
|
|
491
609
|
rv
|
|
492
610
|
end
|
|
493
611
|
|
|
494
612
|
private
|
|
613
|
+
def add_namespace(prefix, uri)
|
|
614
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
|
615
|
+
if uri.nil?
|
|
616
|
+
@namespaces.delete(prefix)
|
|
617
|
+
else
|
|
618
|
+
@namespaces[prefix] = uri
|
|
619
|
+
end
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
def push_namespaces_restore
|
|
623
|
+
namespaces_restore = {}
|
|
624
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
|
625
|
+
namespaces_restore
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
def pop_namespaces_restore
|
|
629
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
|
630
|
+
namespaces_restore.each do |prefix, uri|
|
|
631
|
+
if uri.nil?
|
|
632
|
+
@namespaces.delete(prefix)
|
|
633
|
+
else
|
|
634
|
+
@namespaces[prefix] = uri
|
|
635
|
+
end
|
|
636
|
+
end
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
def record_entity_expansion(delta=1)
|
|
640
|
+
@entity_expansion_count += delta
|
|
641
|
+
if @entity_expansion_count > @entity_expansion_limit
|
|
642
|
+
raise "number of entity expansions exceeded, processing aborted."
|
|
643
|
+
end
|
|
644
|
+
end
|
|
645
|
+
|
|
495
646
|
def need_source_encoding_update?(xml_declaration_encoding)
|
|
496
647
|
return false if xml_declaration_encoding.nil?
|
|
497
648
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
|
498
649
|
true
|
|
499
650
|
end
|
|
500
651
|
|
|
652
|
+
def normalize_xml_declaration_encoding(xml_declaration_encoding)
|
|
653
|
+
/\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
|
|
654
|
+
end
|
|
655
|
+
|
|
501
656
|
def parse_name(base_error_message)
|
|
502
|
-
md = @source.match(
|
|
657
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
|
503
658
|
unless md
|
|
504
|
-
if @source.match(/\
|
|
659
|
+
if @source.match?(/\S/um)
|
|
505
660
|
message = "#{base_error_message}: invalid name"
|
|
506
661
|
else
|
|
507
662
|
message = "#{base_error_message}: name is missing"
|
|
508
663
|
end
|
|
509
664
|
raise REXML::ParseException.new(message, @source)
|
|
510
665
|
end
|
|
511
|
-
md[
|
|
666
|
+
md[0]
|
|
512
667
|
end
|
|
513
668
|
|
|
514
669
|
def parse_id(base_error_message,
|
|
@@ -543,131 +698,231 @@ module REXML
|
|
|
543
698
|
accept_public_id:)
|
|
544
699
|
public = /\A\s*PUBLIC/um
|
|
545
700
|
system = /\A\s*SYSTEM/um
|
|
546
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
|
547
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
701
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
|
702
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
548
703
|
return "public ID literal is missing"
|
|
549
704
|
end
|
|
550
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
705
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
551
706
|
return "invalid public ID literal"
|
|
552
707
|
end
|
|
553
708
|
if accept_public_id
|
|
554
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
709
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
555
710
|
return "system ID literal is missing"
|
|
556
711
|
end
|
|
557
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
712
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
558
713
|
return "invalid system literal"
|
|
559
714
|
end
|
|
560
715
|
"garbage after system literal"
|
|
561
716
|
else
|
|
562
717
|
"garbage after public ID literal"
|
|
563
718
|
end
|
|
564
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
|
565
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
719
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
|
720
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
566
721
|
return "system literal is missing"
|
|
567
722
|
end
|
|
568
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
723
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
569
724
|
return "invalid system literal"
|
|
570
725
|
end
|
|
571
726
|
"garbage after system literal"
|
|
572
727
|
else
|
|
573
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
728
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
574
729
|
return "invalid ID type"
|
|
575
730
|
end
|
|
576
731
|
"ID type is missing"
|
|
577
732
|
end
|
|
578
733
|
end
|
|
579
734
|
|
|
580
|
-
def
|
|
581
|
-
|
|
582
|
-
unless
|
|
583
|
-
|
|
584
|
-
raise REXML::ParseException.new(message, @source)
|
|
735
|
+
def process_comment
|
|
736
|
+
text = @source.read_until("-->")
|
|
737
|
+
unless text.chomp!("-->")
|
|
738
|
+
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
|
|
585
739
|
end
|
|
586
|
-
[:processing_instruction, match_data[1], match_data[2]]
|
|
587
|
-
end
|
|
588
740
|
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
closed = false
|
|
592
|
-
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
|
593
|
-
if match_data.nil?
|
|
594
|
-
message = "Start tag isn't ended"
|
|
595
|
-
raise REXML::ParseException.new(message, @source)
|
|
741
|
+
if text.include? "--" or text.end_with?("-")
|
|
742
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
|
596
743
|
end
|
|
744
|
+
text
|
|
745
|
+
end
|
|
597
746
|
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
747
|
+
def process_instruction
|
|
748
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
749
|
+
if name == "xml"
|
|
750
|
+
xml_declaration
|
|
751
|
+
else # PITarget
|
|
752
|
+
if @source.skip_spaces # e.g. <?name content?>
|
|
753
|
+
start_position = @source.position
|
|
754
|
+
content = @source.read_until("?>")
|
|
755
|
+
unless content.chomp!("?>")
|
|
756
|
+
@source.position = start_position
|
|
757
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
758
|
+
end
|
|
759
|
+
else # e.g. <?name?>
|
|
760
|
+
content = nil
|
|
761
|
+
unless @source.match?("?>", true)
|
|
762
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
763
|
+
end
|
|
764
|
+
end
|
|
765
|
+
[:processing_instruction, name, content]
|
|
766
|
+
end
|
|
767
|
+
end
|
|
602
768
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
769
|
+
def xml_declaration
|
|
770
|
+
unless @version.nil?
|
|
771
|
+
raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
|
|
772
|
+
end
|
|
773
|
+
if @document_status
|
|
774
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
775
|
+
end
|
|
776
|
+
unless @source.skip_spaces
|
|
777
|
+
raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
|
|
778
|
+
end
|
|
779
|
+
unless @source.match?("version", true)
|
|
780
|
+
raise ParseException.new("Malformed XML: XML declaration misses version", @source)
|
|
781
|
+
end
|
|
782
|
+
@version = parse_attribute_value_with_equal("xml")
|
|
783
|
+
unless @source.skip_spaces
|
|
784
|
+
unless @source.match?("?>", true)
|
|
785
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
607
786
|
end
|
|
787
|
+
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
788
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
|
|
789
|
+
end
|
|
608
790
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
unless
|
|
613
|
-
|
|
614
|
-
raise REXML::ParseException.new(message, @source)
|
|
615
|
-
end
|
|
616
|
-
name = scanner[0]
|
|
617
|
-
unless scanner.scan(/\s*=\s*/um)
|
|
618
|
-
message = "Missing attribute equal: <#{name}>"
|
|
619
|
-
raise REXML::ParseException.new(message, @source)
|
|
620
|
-
end
|
|
621
|
-
quote = scanner.scan(/['"]/)
|
|
622
|
-
unless quote
|
|
623
|
-
message = "Missing attribute value start quote: <#{name}>"
|
|
624
|
-
raise REXML::ParseException.new(message, @source)
|
|
791
|
+
if @source.match?("encoding", true)
|
|
792
|
+
encoding = parse_attribute_value_with_equal("xml")
|
|
793
|
+
unless @source.skip_spaces
|
|
794
|
+
unless @source.match?("?>", true)
|
|
795
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
625
796
|
end
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
if match_data
|
|
629
|
-
scanner << "/" if closed
|
|
630
|
-
scanner << ">"
|
|
631
|
-
scanner << match_data[1]
|
|
632
|
-
scanner.pos = pos
|
|
633
|
-
closed = !match_data[2].nil?
|
|
634
|
-
next
|
|
635
|
-
end
|
|
636
|
-
message =
|
|
637
|
-
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
638
|
-
raise REXML::ParseException.new(message, @source)
|
|
797
|
+
if need_source_encoding_update?(encoding)
|
|
798
|
+
@source.encoding = encoding
|
|
639
799
|
end
|
|
800
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
801
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
802
|
+
end
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
if @source.match?("standalone", true)
|
|
806
|
+
standalone = parse_attribute_value_with_equal("xml")
|
|
807
|
+
case standalone
|
|
808
|
+
when "yes", "no"
|
|
809
|
+
else
|
|
810
|
+
raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
|
|
811
|
+
end
|
|
812
|
+
end
|
|
813
|
+
@source.skip_spaces
|
|
814
|
+
unless @source.match?("?>", true)
|
|
815
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
816
|
+
end
|
|
817
|
+
|
|
818
|
+
if need_source_encoding_update?(encoding)
|
|
819
|
+
@source.encoding = encoding
|
|
820
|
+
end
|
|
821
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
822
|
+
|
|
823
|
+
# e.g. <?xml version="1.0" ?>
|
|
824
|
+
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
825
|
+
# <?xml version="1.1" standalone="yes"?>
|
|
826
|
+
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
827
|
+
[ :xmldecl, @version, encoding, standalone ]
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
if StringScanner::Version < "3.1.1"
|
|
831
|
+
def scan_quote
|
|
832
|
+
@source.match(/(['"])/, true)&.[](1)
|
|
833
|
+
end
|
|
834
|
+
else
|
|
835
|
+
def scan_quote
|
|
836
|
+
case @source.peek_byte
|
|
837
|
+
when 34 # '"'.ord
|
|
838
|
+
@source.scan_byte
|
|
839
|
+
'"'
|
|
840
|
+
when 39 # "'".ord
|
|
841
|
+
@source.scan_byte
|
|
842
|
+
"'"
|
|
843
|
+
else
|
|
844
|
+
nil
|
|
640
845
|
end
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
846
|
+
end
|
|
847
|
+
end
|
|
848
|
+
|
|
849
|
+
def parse_attribute_value_with_equal(name)
|
|
850
|
+
unless @source.match?(Private::EQUAL_PATTERN, true)
|
|
851
|
+
message = "Missing attribute equal: <#{name}>"
|
|
852
|
+
raise REXML::ParseException.new(message, @source)
|
|
853
|
+
end
|
|
854
|
+
unless quote = scan_quote
|
|
855
|
+
message = "Missing attribute value start quote: <#{name}>"
|
|
856
|
+
raise REXML::ParseException.new(message, @source)
|
|
857
|
+
end
|
|
858
|
+
start_position = @source.position
|
|
859
|
+
value = @source.read_until(quote)
|
|
860
|
+
unless value.chomp!(quote)
|
|
861
|
+
@source.position = start_position
|
|
862
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
863
|
+
raise REXML::ParseException.new(message, @source)
|
|
864
|
+
end
|
|
865
|
+
value
|
|
866
|
+
end
|
|
867
|
+
|
|
868
|
+
def parse_attributes(prefixes)
|
|
869
|
+
attributes = {}
|
|
870
|
+
expanded_names = {}
|
|
871
|
+
closed = false
|
|
872
|
+
while true
|
|
873
|
+
if @source.match?(">", true)
|
|
874
|
+
return attributes, closed
|
|
875
|
+
elsif @source.match?("/>", true)
|
|
876
|
+
closed = true
|
|
877
|
+
return attributes, closed
|
|
878
|
+
elsif match = @source.match(QNAME, true)
|
|
879
|
+
name = match[1]
|
|
880
|
+
prefix = match[2]
|
|
881
|
+
local_part = match[3]
|
|
882
|
+
value = parse_attribute_value_with_equal(name)
|
|
883
|
+
@source.skip_spaces
|
|
884
|
+
if prefix == "xmlns"
|
|
885
|
+
if local_part == "xml"
|
|
886
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
|
887
|
+
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
888
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
889
|
+
raise REXML::ParseException.new( msg, @source, self )
|
|
890
|
+
end
|
|
891
|
+
elsif local_part == "xmlns"
|
|
892
|
+
msg = "The 'xmlns' prefix must not be declared "+
|
|
650
893
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
651
|
-
raise REXML::ParseException.new( msg, @source, self
|
|
894
|
+
raise REXML::ParseException.new( msg, @source, self)
|
|
652
895
|
end
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
raise REXML::ParseException.new( msg, @source, self)
|
|
896
|
+
add_namespace(local_part, value)
|
|
897
|
+
elsif prefix
|
|
898
|
+
prefixes << prefix unless prefix == "xml"
|
|
657
899
|
end
|
|
658
|
-
curr_ns << local_part
|
|
659
|
-
elsif prefix
|
|
660
|
-
prefixes << prefix unless prefix == "xml"
|
|
661
|
-
end
|
|
662
900
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
901
|
+
if attributes[name]
|
|
902
|
+
msg = "Duplicate attribute #{name.inspect}"
|
|
903
|
+
raise REXML::ParseException.new(msg, @source, self)
|
|
904
|
+
end
|
|
667
905
|
|
|
668
|
-
|
|
906
|
+
unless prefix == "xmlns"
|
|
907
|
+
uri = @namespaces[prefix]
|
|
908
|
+
expanded_name = [uri, local_part]
|
|
909
|
+
existing_prefix = expanded_names[expanded_name]
|
|
910
|
+
if existing_prefix
|
|
911
|
+
message = "Namespace conflict in adding attribute " +
|
|
912
|
+
"\"#{local_part}\": " +
|
|
913
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
|
914
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
|
915
|
+
raise REXML::ParseException.new(message, @source, self)
|
|
916
|
+
end
|
|
917
|
+
expanded_names[expanded_name] = prefix
|
|
918
|
+
end
|
|
919
|
+
|
|
920
|
+
attributes[name] = value
|
|
921
|
+
else
|
|
922
|
+
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
|
923
|
+
raise REXML::ParseException.new(message, @source)
|
|
924
|
+
end
|
|
669
925
|
end
|
|
670
|
-
return attributes, closed
|
|
671
926
|
end
|
|
672
927
|
end
|
|
673
928
|
end
|