rexml 3.2.5 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +204 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +14 -9
- data/lib/rexml/document.rb +1 -1
- data/lib/rexml/element.rb +3 -3
- data/lib/rexml/entity.rb +25 -15
- data/lib/rexml/formatters/pretty.rb +2 -2
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +247 -229
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +114 -100
- data/lib/rexml/text.rb +6 -4
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +12 -38
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
4
|
require_relative '../source'
|
@@ -7,6 +7,17 @@ require "strscan"
|
|
7
7
|
|
8
8
|
module REXML
|
9
9
|
module Parsers
|
10
|
+
if StringScanner::Version < "3.0.8"
|
11
|
+
module StringScannerCaptures
|
12
|
+
refine StringScanner do
|
13
|
+
def captures
|
14
|
+
values_at(*(1...size))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
using StringScannerCaptures
|
19
|
+
end
|
20
|
+
|
10
21
|
# = Using the Pull Parser
|
11
22
|
# <em>This API is experimental, and subject to change.</em>
|
12
23
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -96,7 +107,7 @@ module REXML
|
|
96
107
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
97
108
|
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
98
109
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
99
|
-
ENTITYDECL = /\s*(?:#{GEDECL})
|
110
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
100
111
|
|
101
112
|
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
102
113
|
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
@@ -112,6 +123,19 @@ module REXML
|
|
112
123
|
"apos" => [/'/, "'", "'", /'/]
|
113
124
|
}
|
114
125
|
|
126
|
+
module Private
|
127
|
+
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
128
|
+
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
129
|
+
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
130
|
+
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
131
|
+
NAME_PATTERN = /\s*#{NAME}/um
|
132
|
+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
133
|
+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
134
|
+
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
135
|
+
end
|
136
|
+
private_constant :Private
|
137
|
+
include Private
|
138
|
+
|
115
139
|
def initialize( source )
|
116
140
|
self.stream = source
|
117
141
|
@listeners = []
|
@@ -196,181 +220,184 @@ module REXML
|
|
196
220
|
return @stack.shift if @stack.size > 0
|
197
221
|
#STDERR.puts @source.encoding
|
198
222
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
223
|
+
|
224
|
+
@source.ensure_buffer
|
199
225
|
if @document_status == nil
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
217
|
-
encoding = "UTF-16"
|
218
|
-
end
|
219
|
-
standalone = STANDALONE.match(results)
|
220
|
-
standalone = standalone[1] unless standalone.nil?
|
221
|
-
return [ :xmldecl, version, encoding, standalone ]
|
222
|
-
when INSTRUCTION_START
|
223
|
-
return process_instruction
|
224
|
-
when DOCTYPE_START
|
225
|
-
base_error_message = "Malformed DOCTYPE"
|
226
|
-
@source.match(DOCTYPE_START, true)
|
227
|
-
@nsstack.unshift(curr_ns=Set.new)
|
228
|
-
name = parse_name(base_error_message)
|
229
|
-
if @source.match(/\A\s*\[/um, true)
|
230
|
-
id = [nil, nil, nil]
|
231
|
-
@document_status = :in_doctype
|
232
|
-
elsif @source.match(/\A\s*>/um, true)
|
233
|
-
id = [nil, nil, nil]
|
234
|
-
@document_status = :after_doctype
|
235
|
-
else
|
236
|
-
id = parse_id(base_error_message,
|
237
|
-
accept_external_id: true,
|
238
|
-
accept_public_id: false)
|
239
|
-
if id[0] == "SYSTEM"
|
240
|
-
# For backward compatibility
|
241
|
-
id[1], id[2] = id[2], nil
|
226
|
+
start_position = @source.position
|
227
|
+
if @source.match("<?", true)
|
228
|
+
return process_instruction(start_position)
|
229
|
+
elsif @source.match("<!", true)
|
230
|
+
if @source.match("--", true)
|
231
|
+
return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
|
232
|
+
elsif @source.match("DOCTYPE", true)
|
233
|
+
base_error_message = "Malformed DOCTYPE"
|
234
|
+
unless @source.match(/\s+/um, true)
|
235
|
+
if @source.match(">")
|
236
|
+
message = "#{base_error_message}: name is missing"
|
237
|
+
else
|
238
|
+
message = "#{base_error_message}: invalid name"
|
239
|
+
end
|
240
|
+
@source.position = start_position
|
241
|
+
raise REXML::ParseException.new(message, @source)
|
242
242
|
end
|
243
|
-
|
243
|
+
@nsstack.unshift(curr_ns=Set.new)
|
244
|
+
name = parse_name(base_error_message)
|
245
|
+
if @source.match(/\s*\[/um, true)
|
246
|
+
id = [nil, nil, nil]
|
244
247
|
@document_status = :in_doctype
|
245
|
-
elsif @source.match(/\
|
248
|
+
elsif @source.match(/\s*>/um, true)
|
249
|
+
id = [nil, nil, nil]
|
246
250
|
@document_status = :after_doctype
|
251
|
+
@source.ensure_buffer
|
247
252
|
else
|
248
|
-
|
249
|
-
|
253
|
+
id = parse_id(base_error_message,
|
254
|
+
accept_external_id: true,
|
255
|
+
accept_public_id: false)
|
256
|
+
if id[0] == "SYSTEM"
|
257
|
+
# For backward compatibility
|
258
|
+
id[1], id[2] = id[2], nil
|
259
|
+
end
|
260
|
+
if @source.match(/\s*\[/um, true)
|
261
|
+
@document_status = :in_doctype
|
262
|
+
elsif @source.match(/\s*>/um, true)
|
263
|
+
@document_status = :after_doctype
|
264
|
+
@source.ensure_buffer
|
265
|
+
else
|
266
|
+
message = "#{base_error_message}: garbage after external ID"
|
267
|
+
raise REXML::ParseException.new(message, @source)
|
268
|
+
end
|
250
269
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
@document_status = :after_doctype
|
261
|
-
if @source.encoding == "UTF-8"
|
262
|
-
@source.buffer.force_encoding(::Encoding::UTF_8)
|
270
|
+
args = [:start_doctype, name, *id]
|
271
|
+
if @document_status == :after_doctype
|
272
|
+
@source.match(/\s*/um, true)
|
273
|
+
@stack << [ :end_doctype ]
|
274
|
+
end
|
275
|
+
return args
|
276
|
+
else
|
277
|
+
message = "Invalid XML"
|
278
|
+
raise REXML::ParseException.new(message, @source)
|
263
279
|
end
|
264
280
|
end
|
265
281
|
end
|
266
282
|
if @document_status == :in_doctype
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
if match[1] == '%'
|
281
|
-
ref = true
|
282
|
-
match.delete_at 1
|
283
|
-
end
|
284
|
-
# Now we have to sort out what kind of entity reference this is
|
285
|
-
if match[2] == 'SYSTEM'
|
286
|
-
# External reference
|
287
|
-
match[3] = match[3][1..-2] # PUBID
|
288
|
-
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
289
|
-
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
290
|
-
elsif match[2] == 'PUBLIC'
|
291
|
-
# External reference
|
292
|
-
match[3] = match[3][1..-2] # PUBID
|
293
|
-
match[4] = match[4][1..-2] # HREF
|
294
|
-
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
295
|
-
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
296
|
-
else
|
297
|
-
match[2] = match[2][1..-2]
|
298
|
-
match.pop if match.size == 4
|
299
|
-
# match is [ :entity, name, value ]
|
300
|
-
end
|
301
|
-
match << '%' if ref
|
302
|
-
return match
|
303
|
-
when ATTLISTDECL_START
|
304
|
-
md = @source.match( ATTLISTDECL_PATTERN, true )
|
305
|
-
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
306
|
-
element = md[1]
|
307
|
-
contents = md[0]
|
308
|
-
|
309
|
-
pairs = {}
|
310
|
-
values = md[0].scan( ATTDEF_RE )
|
311
|
-
values.each do |attdef|
|
312
|
-
unless attdef[3] == "#IMPLIED"
|
313
|
-
attdef.compact!
|
314
|
-
val = attdef[3]
|
315
|
-
val = attdef[4] if val == "#FIXED "
|
316
|
-
pairs[attdef[0]] = val
|
317
|
-
if attdef[0] =~ /^xmlns:(.*)/
|
318
|
-
@nsstack[0] << $1
|
319
|
-
end
|
283
|
+
@source.match(/\s*/um, true) # skip spaces
|
284
|
+
start_position = @source.position
|
285
|
+
if @source.match("<!", true)
|
286
|
+
if @source.match("ELEMENT", true)
|
287
|
+
md = @source.match(/(.*?)>/um, true)
|
288
|
+
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
289
|
+
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
290
|
+
elsif @source.match("ENTITY", true)
|
291
|
+
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
292
|
+
ref = false
|
293
|
+
if match[1] == '%'
|
294
|
+
ref = true
|
295
|
+
match.delete_at 1
|
320
296
|
end
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
297
|
+
# Now we have to sort out what kind of entity reference this is
|
298
|
+
if match[2] == 'SYSTEM'
|
299
|
+
# External reference
|
300
|
+
match[3] = match[3][1..-2] # PUBID
|
301
|
+
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
302
|
+
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
303
|
+
elsif match[2] == 'PUBLIC'
|
304
|
+
# External reference
|
305
|
+
match[3] = match[3][1..-2] # PUBID
|
306
|
+
match[4] = match[4][1..-2] # HREF
|
307
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
308
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
328
309
|
else
|
329
|
-
|
310
|
+
match[2] = match[2][1..-2]
|
311
|
+
match.pop if match.size == 4
|
312
|
+
# match is [ :entity, name, value ]
|
330
313
|
end
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
314
|
+
match << '%' if ref
|
315
|
+
return match
|
316
|
+
elsif @source.match("ATTLIST", true)
|
317
|
+
md = @source.match(ATTLISTDECL_END, true)
|
318
|
+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
319
|
+
element = md[1]
|
320
|
+
contents = md[0]
|
321
|
+
|
322
|
+
pairs = {}
|
323
|
+
values = md[0].scan( ATTDEF_RE )
|
324
|
+
values.each do |attdef|
|
325
|
+
unless attdef[3] == "#IMPLIED"
|
326
|
+
attdef.compact!
|
327
|
+
val = attdef[3]
|
328
|
+
val = attdef[4] if val == "#FIXED "
|
329
|
+
pairs[attdef[0]] = val
|
330
|
+
if attdef[0] =~ /^xmlns:(.*)/
|
331
|
+
@nsstack[0] << $1
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
return [ :attlistdecl, element, pairs, contents ]
|
336
|
+
elsif @source.match("NOTATION", true)
|
337
|
+
base_error_message = "Malformed notation declaration"
|
338
|
+
unless @source.match(/\s+/um, true)
|
339
|
+
if @source.match(">")
|
340
|
+
message = "#{base_error_message}: name is missing"
|
341
|
+
else
|
342
|
+
message = "#{base_error_message}: invalid name"
|
343
|
+
end
|
344
|
+
@source.position = start_position
|
345
|
+
raise REXML::ParseException.new(message, @source)
|
346
|
+
end
|
347
|
+
name = parse_name(base_error_message)
|
348
|
+
id = parse_id(base_error_message,
|
349
|
+
accept_external_id: true,
|
350
|
+
accept_public_id: true)
|
351
|
+
unless @source.match(/\s*>/um, true)
|
352
|
+
message = "#{base_error_message}: garbage before end >"
|
353
|
+
raise REXML::ParseException.new(message, @source)
|
354
|
+
end
|
355
|
+
return [:notationdecl, name, *id]
|
356
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
357
|
+
case md[1]
|
358
|
+
when /--/, /-\z/
|
359
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
360
|
+
end
|
361
|
+
return [ :comment, md[1] ] if md
|
340
362
|
end
|
341
|
-
|
342
|
-
|
363
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
364
|
+
return [ :externalentity, match[1] ]
|
365
|
+
elsif @source.match(/\]\s*>/um, true)
|
343
366
|
@document_status = :after_doctype
|
344
|
-
@source.match( DOCTYPE_END, true )
|
345
367
|
return [ :end_doctype ]
|
346
368
|
end
|
347
369
|
end
|
348
370
|
if @document_status == :after_doctype
|
349
|
-
@source.match(/\
|
371
|
+
@source.match(/\s*/um, true)
|
350
372
|
end
|
351
373
|
begin
|
352
|
-
|
353
|
-
if @source.
|
354
|
-
|
374
|
+
start_position = @source.position
|
375
|
+
if @source.match("<", true)
|
376
|
+
# :text's read_until may remain only "<" in buffer. In the
|
377
|
+
# case, buffer is empty here. So we need to fill buffer
|
378
|
+
# here explicitly.
|
379
|
+
@source.ensure_buffer
|
380
|
+
if @source.match("/", true)
|
355
381
|
@nsstack.shift
|
356
382
|
last_tag = @tags.pop
|
357
|
-
md = @source.match(
|
383
|
+
md = @source.match(CLOSE_PATTERN, true)
|
358
384
|
if md and !last_tag
|
359
385
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
360
386
|
raise REXML::ParseException.new(message, @source)
|
361
387
|
end
|
362
388
|
if md.nil? or last_tag != md[1]
|
363
389
|
message = "Missing end tag for '#{last_tag}'"
|
364
|
-
message
|
390
|
+
message += " (got '#{md[1]}')" if md
|
391
|
+
@source.position = start_position if md.nil?
|
365
392
|
raise REXML::ParseException.new(message, @source)
|
366
393
|
end
|
367
394
|
return [ :end_element, last_tag ]
|
368
|
-
elsif @source.
|
369
|
-
md = @source.match(
|
395
|
+
elsif @source.match("!", true)
|
396
|
+
md = @source.match(/([^>]*>)/um)
|
370
397
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
371
398
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
372
|
-
if md[0][
|
373
|
-
md = @source.match(
|
399
|
+
if md[0][0] == ?-
|
400
|
+
md = @source.match(/--(.*?)-->/um, true)
|
374
401
|
|
375
402
|
case md[1]
|
376
403
|
when /--/, /-\z/
|
@@ -379,19 +406,21 @@ module REXML
|
|
379
406
|
|
380
407
|
return [ :comment, md[1] ] if md
|
381
408
|
else
|
382
|
-
md = @source.match(
|
409
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
383
410
|
return [ :cdata, md[1] ] if md
|
384
411
|
end
|
385
412
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
386
413
|
"in the doctype declaration.", @source)
|
387
|
-
elsif @source.
|
388
|
-
return process_instruction
|
414
|
+
elsif @source.match("?", true)
|
415
|
+
return process_instruction(start_position)
|
389
416
|
else
|
390
417
|
# Get the next tag
|
391
|
-
md = @source.match(
|
418
|
+
md = @source.match(TAG_PATTERN, true)
|
392
419
|
unless md
|
420
|
+
@source.position = start_position
|
393
421
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
394
422
|
end
|
423
|
+
tag = md[1]
|
395
424
|
@document_status = :in_element
|
396
425
|
prefixes = Set.new
|
397
426
|
prefixes << md[2] if md[2]
|
@@ -405,23 +434,19 @@ module REXML
|
|
405
434
|
end
|
406
435
|
|
407
436
|
if closed
|
408
|
-
@closed =
|
437
|
+
@closed = tag
|
409
438
|
@nsstack.shift
|
410
439
|
else
|
411
|
-
@tags.push(
|
440
|
+
@tags.push( tag )
|
412
441
|
end
|
413
|
-
return [ :start_element,
|
442
|
+
return [ :start_element, tag, attributes ]
|
414
443
|
end
|
415
444
|
else
|
416
|
-
|
417
|
-
if
|
418
|
-
@source.
|
445
|
+
text = @source.read_until("<")
|
446
|
+
if text.chomp!("<")
|
447
|
+
@source.position -= "<".bytesize
|
419
448
|
end
|
420
|
-
|
421
|
-
#return [ :text, "" ] if md[0].length == 0
|
422
|
-
# unnormalized = Text::unnormalize( md[1], self )
|
423
|
-
# return PullEvent.new( :text, md[1], unnormalized )
|
424
|
-
return [ :text, md[1] ]
|
449
|
+
return [ :text, text ]
|
425
450
|
end
|
426
451
|
rescue REXML::UndefinedNamespaceException
|
427
452
|
raise
|
@@ -463,8 +488,7 @@ module REXML
|
|
463
488
|
|
464
489
|
# Unescapes all possible entities
|
465
490
|
def unnormalize( string, entities=nil, filter=nil )
|
466
|
-
rv = string.
|
467
|
-
rv.gsub!( /\r\n?/, "\n" )
|
491
|
+
rv = string.gsub( /\r\n?/, "\n" )
|
468
492
|
matches = rv.scan( REFERENCE_RE )
|
469
493
|
return rv if matches.size == 0
|
470
494
|
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
@@ -499,9 +523,9 @@ module REXML
|
|
499
523
|
end
|
500
524
|
|
501
525
|
def parse_name(base_error_message)
|
502
|
-
md = @source.match(
|
526
|
+
md = @source.match(NAME_PATTERN, true)
|
503
527
|
unless md
|
504
|
-
if @source.match(/\
|
528
|
+
if @source.match(/\s*\S/um)
|
505
529
|
message = "#{base_error_message}: invalid name"
|
506
530
|
else
|
507
531
|
message = "#{base_error_message}: name is missing"
|
@@ -577,97 +601,91 @@ module REXML
|
|
577
601
|
end
|
578
602
|
end
|
579
603
|
|
580
|
-
def process_instruction
|
581
|
-
match_data = @source.match(
|
604
|
+
def process_instruction(start_position)
|
605
|
+
match_data = @source.match(INSTRUCTION_END, true)
|
582
606
|
unless match_data
|
583
607
|
message = "Invalid processing instruction node"
|
608
|
+
@source.position = start_position
|
584
609
|
raise REXML::ParseException.new(message, @source)
|
585
610
|
end
|
611
|
+
if @document_status.nil? and match_data[1] == "xml"
|
612
|
+
content = match_data[2]
|
613
|
+
version = VERSION.match(content)
|
614
|
+
version = version[1] unless version.nil?
|
615
|
+
encoding = ENCODING.match(content)
|
616
|
+
encoding = encoding[1] unless encoding.nil?
|
617
|
+
if need_source_encoding_update?(encoding)
|
618
|
+
@source.encoding = encoding
|
619
|
+
end
|
620
|
+
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
621
|
+
encoding = "UTF-16"
|
622
|
+
end
|
623
|
+
standalone = STANDALONE.match(content)
|
624
|
+
standalone = standalone[1] unless standalone.nil?
|
625
|
+
return [ :xmldecl, version, encoding, standalone ]
|
626
|
+
end
|
586
627
|
[:processing_instruction, match_data[1], match_data[2]]
|
587
628
|
end
|
588
629
|
|
589
630
|
def parse_attributes(prefixes, curr_ns)
|
590
631
|
attributes = {}
|
591
632
|
closed = false
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
until scanner.eos?
|
605
|
-
if scanner.scan(/\s+/)
|
606
|
-
break if scanner.eos?
|
607
|
-
end
|
608
|
-
|
609
|
-
pos = scanner.pos
|
610
|
-
loop do
|
611
|
-
break if scanner.scan(ATTRIBUTE_PATTERN)
|
612
|
-
unless scanner.scan(QNAME)
|
613
|
-
message = "Invalid attribute name: <#{scanner.rest}>"
|
614
|
-
raise REXML::ParseException.new(message, @source)
|
615
|
-
end
|
616
|
-
name = scanner[0]
|
617
|
-
unless scanner.scan(/\s*=\s*/um)
|
633
|
+
while true
|
634
|
+
if @source.match(">", true)
|
635
|
+
return attributes, closed
|
636
|
+
elsif @source.match("/>", true)
|
637
|
+
closed = true
|
638
|
+
return attributes, closed
|
639
|
+
elsif match = @source.match(QNAME, true)
|
640
|
+
name = match[1]
|
641
|
+
prefix = match[2]
|
642
|
+
local_part = match[3]
|
643
|
+
|
644
|
+
unless @source.match(/\s*=\s*/um, true)
|
618
645
|
message = "Missing attribute equal: <#{name}>"
|
619
646
|
raise REXML::ParseException.new(message, @source)
|
620
647
|
end
|
621
|
-
|
622
|
-
unless quote
|
648
|
+
unless match = @source.match(/(['"])/, true)
|
623
649
|
message = "Missing attribute value start quote: <#{name}>"
|
624
650
|
raise REXML::ParseException.new(message, @source)
|
625
651
|
end
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
scanner.pos = pos
|
633
|
-
closed = !match_data[2].nil?
|
634
|
-
next
|
635
|
-
end
|
636
|
-
message =
|
637
|
-
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
652
|
+
quote = match[1]
|
653
|
+
start_position = @source.position
|
654
|
+
value = @source.read_until(quote)
|
655
|
+
unless value.chomp!(quote)
|
656
|
+
@source.position = start_position
|
657
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
638
658
|
raise REXML::ParseException.new(message, @source)
|
639
659
|
end
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
msg = "The '
|
660
|
+
@source.match(/\s*/um, true)
|
661
|
+
if prefix == "xmlns"
|
662
|
+
if local_part == "xml"
|
663
|
+
if value != "http://www.w3.org/XML/1998/namespace"
|
664
|
+
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
665
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
666
|
+
raise REXML::ParseException.new( msg, @source, self )
|
667
|
+
end
|
668
|
+
elsif local_part == "xmlns"
|
669
|
+
msg = "The 'xmlns' prefix must not be declared "+
|
650
670
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
651
|
-
raise REXML::ParseException.new( msg, @source, self
|
671
|
+
raise REXML::ParseException.new( msg, @source, self)
|
652
672
|
end
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
raise REXML::ParseException.new( msg, @source, self)
|
673
|
+
curr_ns << local_part
|
674
|
+
elsif prefix
|
675
|
+
prefixes << prefix unless prefix == "xml"
|
657
676
|
end
|
658
|
-
curr_ns << local_part
|
659
|
-
elsif prefix
|
660
|
-
prefixes << prefix unless prefix == "xml"
|
661
|
-
end
|
662
677
|
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
678
|
+
if attributes[name]
|
679
|
+
msg = "Duplicate attribute #{name.inspect}"
|
680
|
+
raise REXML::ParseException.new(msg, @source, self)
|
681
|
+
end
|
667
682
|
|
668
|
-
|
683
|
+
attributes[name] = value
|
684
|
+
else
|
685
|
+
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
686
|
+
raise REXML::ParseException.new(message, @source)
|
687
|
+
end
|
669
688
|
end
|
670
|
-
return attributes, closed
|
671
689
|
end
|
672
690
|
end
|
673
691
|
end
|