rexml 3.3.2 → 3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +200 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +14 -16
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/parsers/baseparser.rb +206 -101
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +14 -0
- data/lib/rexml/parsers/streamparser.rb +15 -9
- data/lib/rexml/parsers/treeparser.rb +0 -7
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +63 -12
- data/lib/rexml/text.rb +20 -43
- metadata +8 -19
@@ -1,12 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
10
27
|
if StringScanner::Version < "3.0.8"
|
11
28
|
module StringScannerCaptures
|
12
29
|
refine StringScanner do
|
@@ -124,29 +141,22 @@ module REXML
|
|
124
141
|
}
|
125
142
|
|
126
143
|
module Private
|
127
|
-
|
128
|
-
INSTRUCTION_TERM = "?>"
|
129
|
-
COMMENT_TERM = "-->"
|
130
|
-
CDATA_TERM = "]]>"
|
131
|
-
DOCTYPE_TERM = "]>"
|
132
|
-
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
-
ENTITY_TERM = DOCTYPE_TERM
|
134
|
-
|
135
|
-
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
136
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
137
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
138
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
139
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
140
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
141
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
142
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
143
152
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
144
|
-
CHARACTER_REFERENCES = /&#
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
145
154
|
DEFAULT_ENTITIES_PATTERNS = {}
|
146
155
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
147
156
|
default_entities.each do |term|
|
148
157
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
149
158
|
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
150
160
|
end
|
151
161
|
private_constant :Private
|
152
162
|
|
@@ -154,6 +164,10 @@ module REXML
|
|
154
164
|
self.stream = source
|
155
165
|
@listeners = []
|
156
166
|
@prefixes = Set.new
|
167
|
+
@entity_expansion_count = 0
|
168
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
169
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
157
171
|
end
|
158
172
|
|
159
173
|
def add_listener( listener )
|
@@ -161,16 +175,24 @@ module REXML
|
|
161
175
|
end
|
162
176
|
|
163
177
|
attr_reader :source
|
178
|
+
attr_reader :entity_expansion_count
|
179
|
+
attr_writer :entity_expansion_limit
|
180
|
+
attr_writer :entity_expansion_text_limit
|
164
181
|
|
165
182
|
def stream=( source )
|
166
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
167
188
|
@closed = nil
|
168
189
|
@have_root = false
|
169
190
|
@document_status = nil
|
170
191
|
@tags = []
|
171
192
|
@stack = []
|
172
193
|
@entities = []
|
173
|
-
@
|
194
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
195
|
+
@namespaces_restore_stack = []
|
174
196
|
end
|
175
197
|
|
176
198
|
def position
|
@@ -238,6 +260,10 @@ module REXML
|
|
238
260
|
if @document_status == :in_doctype
|
239
261
|
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
240
262
|
end
|
263
|
+
unless @tags.empty?
|
264
|
+
path = "/" + @tags.join("/")
|
265
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
266
|
+
end
|
241
267
|
return [ :end_document ]
|
242
268
|
end
|
243
269
|
return @stack.shift if @stack.size > 0
|
@@ -247,11 +273,11 @@ module REXML
|
|
247
273
|
@source.ensure_buffer
|
248
274
|
if @document_status == nil
|
249
275
|
start_position = @source.position
|
250
|
-
if @source.match("<?", true)
|
251
|
-
return process_instruction
|
252
|
-
elsif @source.match("<!", true)
|
253
|
-
if @source.match("--", true)
|
254
|
-
md = @source.match(/(.*?)-->/um, true
|
276
|
+
if @source.match?("<?", true)
|
277
|
+
return process_instruction
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
280
|
+
md = @source.match(/(.*?)-->/um, true)
|
255
281
|
if md.nil?
|
256
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
283
|
end
|
@@ -259,10 +285,10 @@ module REXML
|
|
259
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
260
286
|
end
|
261
287
|
return [ :comment, md[1] ]
|
262
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
263
289
|
base_error_message = "Malformed DOCTYPE"
|
264
|
-
unless @source.match(/\s+/um, true)
|
265
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
266
292
|
message = "#{base_error_message}: name is missing"
|
267
293
|
else
|
268
294
|
message = "#{base_error_message}: invalid name"
|
@@ -270,12 +296,11 @@ module REXML
|
|
270
296
|
@source.position = start_position
|
271
297
|
raise REXML::ParseException.new(message, @source)
|
272
298
|
end
|
273
|
-
@nsstack.unshift(Set.new)
|
274
299
|
name = parse_name(base_error_message)
|
275
|
-
if @source.match(/\s*\[/um, true)
|
300
|
+
if @source.match?(/\s*\[/um, true)
|
276
301
|
id = [nil, nil, nil]
|
277
302
|
@document_status = :in_doctype
|
278
|
-
elsif @source.match(/\s*>/um, true)
|
303
|
+
elsif @source.match?(/\s*>/um, true)
|
279
304
|
id = [nil, nil, nil]
|
280
305
|
@document_status = :after_doctype
|
281
306
|
@source.ensure_buffer
|
@@ -287,9 +312,9 @@ module REXML
|
|
287
312
|
# For backward compatibility
|
288
313
|
id[1], id[2] = id[2], nil
|
289
314
|
end
|
290
|
-
if @source.match(/\s*\[/um, true)
|
315
|
+
if @source.match?(/\s*\[/um, true)
|
291
316
|
@document_status = :in_doctype
|
292
|
-
elsif @source.match(/\s*>/um, true)
|
317
|
+
elsif @source.match?(/\s*>/um, true)
|
293
318
|
@document_status = :after_doctype
|
294
319
|
@source.ensure_buffer
|
295
320
|
else
|
@@ -299,7 +324,7 @@ module REXML
|
|
299
324
|
end
|
300
325
|
args = [:start_doctype, name, *id]
|
301
326
|
if @document_status == :after_doctype
|
302
|
-
@source.match(/\s*/um, true)
|
327
|
+
@source.match?(/\s*/um, true)
|
303
328
|
@stack << [ :end_doctype ]
|
304
329
|
end
|
305
330
|
return args
|
@@ -310,15 +335,19 @@ module REXML
|
|
310
335
|
end
|
311
336
|
end
|
312
337
|
if @document_status == :in_doctype
|
313
|
-
@source.match(/\s*/um, true) # skip spaces
|
338
|
+
@source.match?(/\s*/um, true) # skip spaces
|
314
339
|
start_position = @source.position
|
315
|
-
if @source.match("<!", true)
|
316
|
-
if @source.match("ELEMENT", true)
|
340
|
+
if @source.match?("<!", true)
|
341
|
+
if @source.match?("ELEMENT", true)
|
317
342
|
md = @source.match(/(.*?)>/um, true)
|
318
343
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
319
344
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
320
|
-
elsif @source.match("ENTITY", true)
|
321
|
-
|
345
|
+
elsif @source.match?("ENTITY", true)
|
346
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
347
|
+
unless match_data
|
348
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
349
|
+
end
|
350
|
+
match = [:entitydecl, *match_data.captures.compact]
|
322
351
|
ref = false
|
323
352
|
if match[1] == '%'
|
324
353
|
ref = true
|
@@ -336,6 +365,8 @@ module REXML
|
|
336
365
|
match[4] = match[4][1..-2] # HREF
|
337
366
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
338
367
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
368
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
369
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
339
370
|
else
|
340
371
|
match[2] = match[2][1..-2]
|
341
372
|
match.pop if match.size == 4
|
@@ -343,7 +374,7 @@ module REXML
|
|
343
374
|
end
|
344
375
|
match << '%' if ref
|
345
376
|
return match
|
346
|
-
elsif @source.match("ATTLIST", true)
|
377
|
+
elsif @source.match?("ATTLIST", true)
|
347
378
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
348
379
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
349
380
|
element = md[1]
|
@@ -358,15 +389,15 @@ module REXML
|
|
358
389
|
val = attdef[4] if val == "#FIXED "
|
359
390
|
pairs[attdef[0]] = val
|
360
391
|
if attdef[0] =~ /^xmlns:(.*)/
|
361
|
-
@
|
392
|
+
@namespaces[$1] = val
|
362
393
|
end
|
363
394
|
end
|
364
395
|
end
|
365
396
|
return [ :attlistdecl, element, pairs, contents ]
|
366
|
-
elsif @source.match("NOTATION", true)
|
397
|
+
elsif @source.match?("NOTATION", true)
|
367
398
|
base_error_message = "Malformed notation declaration"
|
368
|
-
unless @source.match(/\s+/um, true)
|
369
|
-
if @source.match(">")
|
399
|
+
unless @source.match?(/\s+/um, true)
|
400
|
+
if @source.match?(">")
|
370
401
|
message = "#{base_error_message}: name is missing"
|
371
402
|
else
|
372
403
|
message = "#{base_error_message}: invalid name"
|
@@ -378,21 +409,21 @@ module REXML
|
|
378
409
|
id = parse_id(base_error_message,
|
379
410
|
accept_external_id: true,
|
380
411
|
accept_public_id: true)
|
381
|
-
unless @source.match(/\s*>/um, true)
|
412
|
+
unless @source.match?(/\s*>/um, true)
|
382
413
|
message = "#{base_error_message}: garbage before end >"
|
383
414
|
raise REXML::ParseException.new(message, @source)
|
384
415
|
end
|
385
416
|
return [:notationdecl, name, *id]
|
386
|
-
elsif md = @source.match(/--(.*?)-->/um, true
|
417
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
387
418
|
case md[1]
|
388
419
|
when /--/, /-\z/
|
389
420
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
421
|
end
|
391
422
|
return [ :comment, md[1] ] if md
|
392
423
|
end
|
393
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true
|
424
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
394
425
|
return [ :externalentity, match[1] ]
|
395
|
-
elsif @source.match(/\]\s*>/um, true)
|
426
|
+
elsif @source.match?(/\]\s*>/um, true)
|
396
427
|
@document_status = :after_doctype
|
397
428
|
return [ :end_doctype ]
|
398
429
|
end
|
@@ -401,17 +432,17 @@ module REXML
|
|
401
432
|
end
|
402
433
|
end
|
403
434
|
if @document_status == :after_doctype
|
404
|
-
@source.match(/\s*/um, true)
|
435
|
+
@source.match?(/\s*/um, true)
|
405
436
|
end
|
406
437
|
begin
|
407
438
|
start_position = @source.position
|
408
|
-
if @source.match("<", true)
|
439
|
+
if @source.match?("<", true)
|
409
440
|
# :text's read_until may remain only "<" in buffer. In the
|
410
441
|
# case, buffer is empty here. So we need to fill buffer
|
411
442
|
# here explicitly.
|
412
443
|
@source.ensure_buffer
|
413
|
-
if @source.match("/", true)
|
414
|
-
@
|
444
|
+
if @source.match?("/", true)
|
445
|
+
@namespaces_restore_stack.pop
|
415
446
|
last_tag = @tags.pop
|
416
447
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
417
448
|
if md and !last_tag
|
@@ -425,12 +456,12 @@ module REXML
|
|
425
456
|
raise REXML::ParseException.new(message, @source)
|
426
457
|
end
|
427
458
|
return [ :end_element, last_tag ]
|
428
|
-
elsif @source.match("!", true)
|
459
|
+
elsif @source.match?("!", true)
|
429
460
|
md = @source.match(/([^>]*>)/um)
|
430
461
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
431
462
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
432
463
|
if md[0][0] == ?-
|
433
|
-
md = @source.match(/--(.*?)-->/um, true
|
464
|
+
md = @source.match(/--(.*?)-->/um, true)
|
434
465
|
|
435
466
|
if md.nil? || /--|-\z/.match?(md[1])
|
436
467
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -438,13 +469,13 @@ module REXML
|
|
438
469
|
|
439
470
|
return [ :comment, md[1] ]
|
440
471
|
else
|
441
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true
|
472
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
442
473
|
return [ :cdata, md[1] ] if md
|
443
474
|
end
|
444
475
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
445
476
|
"in the doctype declaration.", @source)
|
446
|
-
elsif @source.match("?", true)
|
447
|
-
return process_instruction
|
477
|
+
elsif @source.match?("?", true)
|
478
|
+
return process_instruction
|
448
479
|
else
|
449
480
|
# Get the next tag
|
450
481
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -456,18 +487,18 @@ module REXML
|
|
456
487
|
@document_status = :in_element
|
457
488
|
@prefixes.clear
|
458
489
|
@prefixes << md[2] if md[2]
|
459
|
-
|
460
|
-
attributes, closed = parse_attributes(@prefixes
|
490
|
+
push_namespaces_restore
|
491
|
+
attributes, closed = parse_attributes(@prefixes)
|
461
492
|
# Verify that all of the prefixes have been defined
|
462
493
|
for prefix in @prefixes
|
463
|
-
unless @
|
494
|
+
unless @namespaces.key?(prefix)
|
464
495
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
465
496
|
end
|
466
497
|
end
|
467
498
|
|
468
499
|
if closed
|
469
500
|
@closed = tag
|
470
|
-
|
501
|
+
pop_namespaces_restore
|
471
502
|
else
|
472
503
|
if @tags.empty? and @have_root
|
473
504
|
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
@@ -482,11 +513,15 @@ module REXML
|
|
482
513
|
if text.chomp!("<")
|
483
514
|
@source.position -= "<".bytesize
|
484
515
|
end
|
485
|
-
if @tags.empty?
|
516
|
+
if @tags.empty?
|
486
517
|
unless /\A\s*\z/.match?(text)
|
487
|
-
|
518
|
+
if @have_root
|
519
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
520
|
+
else
|
521
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
522
|
+
end
|
488
523
|
end
|
489
|
-
return pull_event
|
524
|
+
return pull_event if @have_root
|
490
525
|
end
|
491
526
|
return [ :text, text ]
|
492
527
|
end
|
@@ -503,13 +538,13 @@ module REXML
|
|
503
538
|
private :pull_event
|
504
539
|
|
505
540
|
def entity( reference, entities )
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
unnormalize( value, entities )
|
541
|
+
return unless entities
|
542
|
+
|
543
|
+
value = entities[ reference ]
|
544
|
+
return if value.nil?
|
545
|
+
|
546
|
+
record_entity_expansion
|
547
|
+
unnormalize( value, entities )
|
513
548
|
end
|
514
549
|
|
515
550
|
# Escapes all possible entities
|
@@ -539,21 +574,37 @@ module REXML
|
|
539
574
|
return rv if matches.size == 0
|
540
575
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
541
576
|
m=$1
|
542
|
-
|
543
|
-
|
577
|
+
if m.start_with?("x")
|
578
|
+
code_point = Integer(m[1..-1], 16)
|
579
|
+
else
|
580
|
+
code_point = Integer(m, 10)
|
581
|
+
end
|
582
|
+
[code_point].pack('U*')
|
544
583
|
}
|
545
584
|
matches.collect!{|x|x[0]}.compact!
|
585
|
+
if filter
|
586
|
+
matches.reject! do |entity_reference|
|
587
|
+
filter.include?(entity_reference)
|
588
|
+
end
|
589
|
+
end
|
546
590
|
if matches.size > 0
|
547
|
-
matches.each do |entity_reference|
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
591
|
+
matches.tally.each do |entity_reference, n|
|
592
|
+
entity_expansion_count_before = @entity_expansion_count
|
593
|
+
entity_value = entity( entity_reference, entities )
|
594
|
+
if entity_value
|
595
|
+
if n > 1
|
596
|
+
entity_expansion_count_delta =
|
597
|
+
@entity_expansion_count - entity_expansion_count_before
|
598
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
599
|
+
end
|
600
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
601
|
+
rv.gsub!( re, entity_value )
|
602
|
+
if rv.bytesize > @entity_expansion_text_limit
|
603
|
+
raise "entity expansion has grown too large"
|
556
604
|
end
|
605
|
+
else
|
606
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
607
|
+
rv.gsub!( er[0], er[2] ) if er
|
557
608
|
end
|
558
609
|
end
|
559
610
|
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
@@ -562,6 +613,39 @@ module REXML
|
|
562
613
|
end
|
563
614
|
|
564
615
|
private
|
616
|
+
def add_namespace(prefix, uri)
|
617
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
618
|
+
if uri.nil?
|
619
|
+
@namespaces.delete(prefix)
|
620
|
+
else
|
621
|
+
@namespaces[prefix] = uri
|
622
|
+
end
|
623
|
+
end
|
624
|
+
|
625
|
+
def push_namespaces_restore
|
626
|
+
namespaces_restore = {}
|
627
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
628
|
+
namespaces_restore
|
629
|
+
end
|
630
|
+
|
631
|
+
def pop_namespaces_restore
|
632
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
633
|
+
namespaces_restore.each do |prefix, uri|
|
634
|
+
if uri.nil?
|
635
|
+
@namespaces.delete(prefix)
|
636
|
+
else
|
637
|
+
@namespaces[prefix] = uri
|
638
|
+
end
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
def record_entity_expansion(delta=1)
|
643
|
+
@entity_expansion_count += delta
|
644
|
+
if @entity_expansion_count > @entity_expansion_limit
|
645
|
+
raise "number of entity expansions exceeded, processing aborted."
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
565
649
|
def need_source_encoding_update?(xml_declaration_encoding)
|
566
650
|
return false if xml_declaration_encoding.nil?
|
567
651
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -571,14 +655,14 @@ module REXML
|
|
571
655
|
def parse_name(base_error_message)
|
572
656
|
md = @source.match(Private::NAME_PATTERN, true)
|
573
657
|
unless md
|
574
|
-
if @source.match(/\
|
658
|
+
if @source.match?(/\S/um)
|
575
659
|
message = "#{base_error_message}: invalid name"
|
576
660
|
else
|
577
661
|
message = "#{base_error_message}: name is missing"
|
578
662
|
end
|
579
663
|
raise REXML::ParseException.new(message, @source)
|
580
664
|
end
|
581
|
-
md[
|
665
|
+
md[0]
|
582
666
|
end
|
583
667
|
|
584
668
|
def parse_id(base_error_message,
|
@@ -613,52 +697,58 @@ module REXML
|
|
613
697
|
accept_public_id:)
|
614
698
|
public = /\A\s*PUBLIC/um
|
615
699
|
system = /\A\s*SYSTEM/um
|
616
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
617
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
700
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
701
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
618
702
|
return "public ID literal is missing"
|
619
703
|
end
|
620
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
704
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
621
705
|
return "invalid public ID literal"
|
622
706
|
end
|
623
707
|
if accept_public_id
|
624
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
708
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
625
709
|
return "system ID literal is missing"
|
626
710
|
end
|
627
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
711
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
628
712
|
return "invalid system literal"
|
629
713
|
end
|
630
714
|
"garbage after system literal"
|
631
715
|
else
|
632
716
|
"garbage after public ID literal"
|
633
717
|
end
|
634
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
635
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
718
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
719
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
636
720
|
return "system literal is missing"
|
637
721
|
end
|
638
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
722
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
639
723
|
return "invalid system literal"
|
640
724
|
end
|
641
725
|
"garbage after system literal"
|
642
726
|
else
|
643
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
727
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
644
728
|
return "invalid ID type"
|
645
729
|
end
|
646
730
|
"ID type is missing"
|
647
731
|
end
|
648
732
|
end
|
649
733
|
|
650
|
-
def process_instruction
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
734
|
+
def process_instruction
|
735
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
736
|
+
if @source.match?(/\s+/um, true)
|
737
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
738
|
+
unless match_data
|
739
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
740
|
+
end
|
741
|
+
content = match_data[1]
|
742
|
+
else
|
743
|
+
content = nil
|
744
|
+
unless @source.match?("?>", true)
|
745
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
746
|
+
end
|
656
747
|
end
|
657
|
-
if
|
748
|
+
if name == "xml"
|
658
749
|
if @document_status
|
659
750
|
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
751
|
end
|
661
|
-
content = match_data[2]
|
662
752
|
version = VERSION.match(content)
|
663
753
|
version = version[1] unless version.nil?
|
664
754
|
encoding = ENCODING.match(content)
|
@@ -673,16 +763,17 @@ module REXML
|
|
673
763
|
standalone = standalone[1] unless standalone.nil?
|
674
764
|
return [ :xmldecl, version, encoding, standalone ]
|
675
765
|
end
|
676
|
-
[:processing_instruction,
|
766
|
+
[:processing_instruction, name, content]
|
677
767
|
end
|
678
768
|
|
679
|
-
def parse_attributes(prefixes
|
769
|
+
def parse_attributes(prefixes)
|
680
770
|
attributes = {}
|
771
|
+
expanded_names = {}
|
681
772
|
closed = false
|
682
773
|
while true
|
683
|
-
if @source.match(">", true)
|
774
|
+
if @source.match?(">", true)
|
684
775
|
return attributes, closed
|
685
|
-
elsif @source.match("/>", true)
|
776
|
+
elsif @source.match?("/>", true)
|
686
777
|
closed = true
|
687
778
|
return attributes, closed
|
688
779
|
elsif match = @source.match(QNAME, true)
|
@@ -690,7 +781,7 @@ module REXML
|
|
690
781
|
prefix = match[2]
|
691
782
|
local_part = match[3]
|
692
783
|
|
693
|
-
unless @source.match(/\s*=\s*/um, true)
|
784
|
+
unless @source.match?(/\s*=\s*/um, true)
|
694
785
|
message = "Missing attribute equal: <#{name}>"
|
695
786
|
raise REXML::ParseException.new(message, @source)
|
696
787
|
end
|
@@ -706,10 +797,10 @@ module REXML
|
|
706
797
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
707
798
|
raise REXML::ParseException.new(message, @source)
|
708
799
|
end
|
709
|
-
@source.match(/\s*/um, true)
|
800
|
+
@source.match?(/\s*/um, true)
|
710
801
|
if prefix == "xmlns"
|
711
802
|
if local_part == "xml"
|
712
|
-
if value !=
|
803
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
713
804
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
714
805
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
715
806
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -719,7 +810,7 @@ module REXML
|
|
719
810
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
720
811
|
raise REXML::ParseException.new( msg, @source, self)
|
721
812
|
end
|
722
|
-
|
813
|
+
add_namespace(local_part, value)
|
723
814
|
elsif prefix
|
724
815
|
prefixes << prefix unless prefix == "xml"
|
725
816
|
end
|
@@ -729,6 +820,20 @@ module REXML
|
|
729
820
|
raise REXML::ParseException.new(msg, @source, self)
|
730
821
|
end
|
731
822
|
|
823
|
+
unless prefix == "xmlns"
|
824
|
+
uri = @namespaces[prefix]
|
825
|
+
expanded_name = [uri, local_part]
|
826
|
+
existing_prefix = expanded_names[expanded_name]
|
827
|
+
if existing_prefix
|
828
|
+
message = "Namespace conflict in adding attribute " +
|
829
|
+
"\"#{local_part}\": " +
|
830
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
831
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
832
|
+
raise REXML::ParseException.new(message, @source, self)
|
833
|
+
end
|
834
|
+
expanded_names[expanded_name] = prefix
|
835
|
+
end
|
836
|
+
|
732
837
|
attributes[name] = value
|
733
838
|
else
|
734
839
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -81,6 +93,10 @@ module REXML
|
|
81
93
|
def unshift token
|
82
94
|
@my_stack.unshift token
|
83
95
|
end
|
96
|
+
|
97
|
+
def reset
|
98
|
+
@parser.reset
|
99
|
+
end
|
84
100
|
end
|
85
101
|
|
86
102
|
# A parsing event. The contents of the event are accessed as an +Array?,
|