rexml 3.3.2 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +200 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +14 -16
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/parsers/baseparser.rb +206 -101
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +14 -0
- data/lib/rexml/parsers/streamparser.rb +15 -9
- data/lib/rexml/parsers/treeparser.rb +0 -7
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +63 -12
- data/lib/rexml/text.rb +20 -43
- metadata +8 -19
@@ -1,12 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
10
27
|
if StringScanner::Version < "3.0.8"
|
11
28
|
module StringScannerCaptures
|
12
29
|
refine StringScanner do
|
@@ -124,29 +141,22 @@ module REXML
|
|
124
141
|
}
|
125
142
|
|
126
143
|
module Private
|
127
|
-
|
128
|
-
INSTRUCTION_TERM = "?>"
|
129
|
-
COMMENT_TERM = "-->"
|
130
|
-
CDATA_TERM = "]]>"
|
131
|
-
DOCTYPE_TERM = "]>"
|
132
|
-
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
-
ENTITY_TERM = DOCTYPE_TERM
|
134
|
-
|
135
|
-
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
136
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
137
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
138
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
139
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
140
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
141
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
142
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
143
152
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
144
|
-
CHARACTER_REFERENCES = /&#
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
145
154
|
DEFAULT_ENTITIES_PATTERNS = {}
|
146
155
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
147
156
|
default_entities.each do |term|
|
148
157
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
149
158
|
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
150
160
|
end
|
151
161
|
private_constant :Private
|
152
162
|
|
@@ -154,6 +164,10 @@ module REXML
|
|
154
164
|
self.stream = source
|
155
165
|
@listeners = []
|
156
166
|
@prefixes = Set.new
|
167
|
+
@entity_expansion_count = 0
|
168
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
169
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
157
171
|
end
|
158
172
|
|
159
173
|
def add_listener( listener )
|
@@ -161,16 +175,24 @@ module REXML
|
|
161
175
|
end
|
162
176
|
|
163
177
|
attr_reader :source
|
178
|
+
attr_reader :entity_expansion_count
|
179
|
+
attr_writer :entity_expansion_limit
|
180
|
+
attr_writer :entity_expansion_text_limit
|
164
181
|
|
165
182
|
def stream=( source )
|
166
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
167
188
|
@closed = nil
|
168
189
|
@have_root = false
|
169
190
|
@document_status = nil
|
170
191
|
@tags = []
|
171
192
|
@stack = []
|
172
193
|
@entities = []
|
173
|
-
@
|
194
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
195
|
+
@namespaces_restore_stack = []
|
174
196
|
end
|
175
197
|
|
176
198
|
def position
|
@@ -238,6 +260,10 @@ module REXML
|
|
238
260
|
if @document_status == :in_doctype
|
239
261
|
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
240
262
|
end
|
263
|
+
unless @tags.empty?
|
264
|
+
path = "/" + @tags.join("/")
|
265
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
266
|
+
end
|
241
267
|
return [ :end_document ]
|
242
268
|
end
|
243
269
|
return @stack.shift if @stack.size > 0
|
@@ -247,11 +273,11 @@ module REXML
|
|
247
273
|
@source.ensure_buffer
|
248
274
|
if @document_status == nil
|
249
275
|
start_position = @source.position
|
250
|
-
if @source.match("<?", true)
|
251
|
-
return process_instruction
|
252
|
-
elsif @source.match("<!", true)
|
253
|
-
if @source.match("--", true)
|
254
|
-
md = @source.match(/(.*?)-->/um, true
|
276
|
+
if @source.match?("<?", true)
|
277
|
+
return process_instruction
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
280
|
+
md = @source.match(/(.*?)-->/um, true)
|
255
281
|
if md.nil?
|
256
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
283
|
end
|
@@ -259,10 +285,10 @@ module REXML
|
|
259
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
260
286
|
end
|
261
287
|
return [ :comment, md[1] ]
|
262
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
263
289
|
base_error_message = "Malformed DOCTYPE"
|
264
|
-
unless @source.match(/\s+/um, true)
|
265
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
266
292
|
message = "#{base_error_message}: name is missing"
|
267
293
|
else
|
268
294
|
message = "#{base_error_message}: invalid name"
|
@@ -270,12 +296,11 @@ module REXML
|
|
270
296
|
@source.position = start_position
|
271
297
|
raise REXML::ParseException.new(message, @source)
|
272
298
|
end
|
273
|
-
@nsstack.unshift(Set.new)
|
274
299
|
name = parse_name(base_error_message)
|
275
|
-
if @source.match(/\s*\[/um, true)
|
300
|
+
if @source.match?(/\s*\[/um, true)
|
276
301
|
id = [nil, nil, nil]
|
277
302
|
@document_status = :in_doctype
|
278
|
-
elsif @source.match(/\s*>/um, true)
|
303
|
+
elsif @source.match?(/\s*>/um, true)
|
279
304
|
id = [nil, nil, nil]
|
280
305
|
@document_status = :after_doctype
|
281
306
|
@source.ensure_buffer
|
@@ -287,9 +312,9 @@ module REXML
|
|
287
312
|
# For backward compatibility
|
288
313
|
id[1], id[2] = id[2], nil
|
289
314
|
end
|
290
|
-
if @source.match(/\s*\[/um, true)
|
315
|
+
if @source.match?(/\s*\[/um, true)
|
291
316
|
@document_status = :in_doctype
|
292
|
-
elsif @source.match(/\s*>/um, true)
|
317
|
+
elsif @source.match?(/\s*>/um, true)
|
293
318
|
@document_status = :after_doctype
|
294
319
|
@source.ensure_buffer
|
295
320
|
else
|
@@ -299,7 +324,7 @@ module REXML
|
|
299
324
|
end
|
300
325
|
args = [:start_doctype, name, *id]
|
301
326
|
if @document_status == :after_doctype
|
302
|
-
@source.match(/\s*/um, true)
|
327
|
+
@source.match?(/\s*/um, true)
|
303
328
|
@stack << [ :end_doctype ]
|
304
329
|
end
|
305
330
|
return args
|
@@ -310,15 +335,19 @@ module REXML
|
|
310
335
|
end
|
311
336
|
end
|
312
337
|
if @document_status == :in_doctype
|
313
|
-
@source.match(/\s*/um, true) # skip spaces
|
338
|
+
@source.match?(/\s*/um, true) # skip spaces
|
314
339
|
start_position = @source.position
|
315
|
-
if @source.match("<!", true)
|
316
|
-
if @source.match("ELEMENT", true)
|
340
|
+
if @source.match?("<!", true)
|
341
|
+
if @source.match?("ELEMENT", true)
|
317
342
|
md = @source.match(/(.*?)>/um, true)
|
318
343
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
319
344
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
320
|
-
elsif @source.match("ENTITY", true)
|
321
|
-
|
345
|
+
elsif @source.match?("ENTITY", true)
|
346
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
347
|
+
unless match_data
|
348
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
349
|
+
end
|
350
|
+
match = [:entitydecl, *match_data.captures.compact]
|
322
351
|
ref = false
|
323
352
|
if match[1] == '%'
|
324
353
|
ref = true
|
@@ -336,6 +365,8 @@ module REXML
|
|
336
365
|
match[4] = match[4][1..-2] # HREF
|
337
366
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
338
367
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
368
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
369
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
339
370
|
else
|
340
371
|
match[2] = match[2][1..-2]
|
341
372
|
match.pop if match.size == 4
|
@@ -343,7 +374,7 @@ module REXML
|
|
343
374
|
end
|
344
375
|
match << '%' if ref
|
345
376
|
return match
|
346
|
-
elsif @source.match("ATTLIST", true)
|
377
|
+
elsif @source.match?("ATTLIST", true)
|
347
378
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
348
379
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
349
380
|
element = md[1]
|
@@ -358,15 +389,15 @@ module REXML
|
|
358
389
|
val = attdef[4] if val == "#FIXED "
|
359
390
|
pairs[attdef[0]] = val
|
360
391
|
if attdef[0] =~ /^xmlns:(.*)/
|
361
|
-
@
|
392
|
+
@namespaces[$1] = val
|
362
393
|
end
|
363
394
|
end
|
364
395
|
end
|
365
396
|
return [ :attlistdecl, element, pairs, contents ]
|
366
|
-
elsif @source.match("NOTATION", true)
|
397
|
+
elsif @source.match?("NOTATION", true)
|
367
398
|
base_error_message = "Malformed notation declaration"
|
368
|
-
unless @source.match(/\s+/um, true)
|
369
|
-
if @source.match(">")
|
399
|
+
unless @source.match?(/\s+/um, true)
|
400
|
+
if @source.match?(">")
|
370
401
|
message = "#{base_error_message}: name is missing"
|
371
402
|
else
|
372
403
|
message = "#{base_error_message}: invalid name"
|
@@ -378,21 +409,21 @@ module REXML
|
|
378
409
|
id = parse_id(base_error_message,
|
379
410
|
accept_external_id: true,
|
380
411
|
accept_public_id: true)
|
381
|
-
unless @source.match(/\s*>/um, true)
|
412
|
+
unless @source.match?(/\s*>/um, true)
|
382
413
|
message = "#{base_error_message}: garbage before end >"
|
383
414
|
raise REXML::ParseException.new(message, @source)
|
384
415
|
end
|
385
416
|
return [:notationdecl, name, *id]
|
386
|
-
elsif md = @source.match(/--(.*?)-->/um, true
|
417
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
387
418
|
case md[1]
|
388
419
|
when /--/, /-\z/
|
389
420
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
421
|
end
|
391
422
|
return [ :comment, md[1] ] if md
|
392
423
|
end
|
393
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true
|
424
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
394
425
|
return [ :externalentity, match[1] ]
|
395
|
-
elsif @source.match(/\]\s*>/um, true)
|
426
|
+
elsif @source.match?(/\]\s*>/um, true)
|
396
427
|
@document_status = :after_doctype
|
397
428
|
return [ :end_doctype ]
|
398
429
|
end
|
@@ -401,17 +432,17 @@ module REXML
|
|
401
432
|
end
|
402
433
|
end
|
403
434
|
if @document_status == :after_doctype
|
404
|
-
@source.match(/\s*/um, true)
|
435
|
+
@source.match?(/\s*/um, true)
|
405
436
|
end
|
406
437
|
begin
|
407
438
|
start_position = @source.position
|
408
|
-
if @source.match("<", true)
|
439
|
+
if @source.match?("<", true)
|
409
440
|
# :text's read_until may remain only "<" in buffer. In the
|
410
441
|
# case, buffer is empty here. So we need to fill buffer
|
411
442
|
# here explicitly.
|
412
443
|
@source.ensure_buffer
|
413
|
-
if @source.match("/", true)
|
414
|
-
@
|
444
|
+
if @source.match?("/", true)
|
445
|
+
@namespaces_restore_stack.pop
|
415
446
|
last_tag = @tags.pop
|
416
447
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
417
448
|
if md and !last_tag
|
@@ -425,12 +456,12 @@ module REXML
|
|
425
456
|
raise REXML::ParseException.new(message, @source)
|
426
457
|
end
|
427
458
|
return [ :end_element, last_tag ]
|
428
|
-
elsif @source.match("!", true)
|
459
|
+
elsif @source.match?("!", true)
|
429
460
|
md = @source.match(/([^>]*>)/um)
|
430
461
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
431
462
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
432
463
|
if md[0][0] == ?-
|
433
|
-
md = @source.match(/--(.*?)-->/um, true
|
464
|
+
md = @source.match(/--(.*?)-->/um, true)
|
434
465
|
|
435
466
|
if md.nil? || /--|-\z/.match?(md[1])
|
436
467
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -438,13 +469,13 @@ module REXML
|
|
438
469
|
|
439
470
|
return [ :comment, md[1] ]
|
440
471
|
else
|
441
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true
|
472
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
442
473
|
return [ :cdata, md[1] ] if md
|
443
474
|
end
|
444
475
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
445
476
|
"in the doctype declaration.", @source)
|
446
|
-
elsif @source.match("?", true)
|
447
|
-
return process_instruction
|
477
|
+
elsif @source.match?("?", true)
|
478
|
+
return process_instruction
|
448
479
|
else
|
449
480
|
# Get the next tag
|
450
481
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -456,18 +487,18 @@ module REXML
|
|
456
487
|
@document_status = :in_element
|
457
488
|
@prefixes.clear
|
458
489
|
@prefixes << md[2] if md[2]
|
459
|
-
|
460
|
-
attributes, closed = parse_attributes(@prefixes
|
490
|
+
push_namespaces_restore
|
491
|
+
attributes, closed = parse_attributes(@prefixes)
|
461
492
|
# Verify that all of the prefixes have been defined
|
462
493
|
for prefix in @prefixes
|
463
|
-
unless @
|
494
|
+
unless @namespaces.key?(prefix)
|
464
495
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
465
496
|
end
|
466
497
|
end
|
467
498
|
|
468
499
|
if closed
|
469
500
|
@closed = tag
|
470
|
-
|
501
|
+
pop_namespaces_restore
|
471
502
|
else
|
472
503
|
if @tags.empty? and @have_root
|
473
504
|
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
@@ -482,11 +513,15 @@ module REXML
|
|
482
513
|
if text.chomp!("<")
|
483
514
|
@source.position -= "<".bytesize
|
484
515
|
end
|
485
|
-
if @tags.empty?
|
516
|
+
if @tags.empty?
|
486
517
|
unless /\A\s*\z/.match?(text)
|
487
|
-
|
518
|
+
if @have_root
|
519
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
520
|
+
else
|
521
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
522
|
+
end
|
488
523
|
end
|
489
|
-
return pull_event
|
524
|
+
return pull_event if @have_root
|
490
525
|
end
|
491
526
|
return [ :text, text ]
|
492
527
|
end
|
@@ -503,13 +538,13 @@ module REXML
|
|
503
538
|
private :pull_event
|
504
539
|
|
505
540
|
def entity( reference, entities )
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
unnormalize( value, entities )
|
541
|
+
return unless entities
|
542
|
+
|
543
|
+
value = entities[ reference ]
|
544
|
+
return if value.nil?
|
545
|
+
|
546
|
+
record_entity_expansion
|
547
|
+
unnormalize( value, entities )
|
513
548
|
end
|
514
549
|
|
515
550
|
# Escapes all possible entities
|
@@ -539,21 +574,37 @@ module REXML
|
|
539
574
|
return rv if matches.size == 0
|
540
575
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
541
576
|
m=$1
|
542
|
-
|
543
|
-
|
577
|
+
if m.start_with?("x")
|
578
|
+
code_point = Integer(m[1..-1], 16)
|
579
|
+
else
|
580
|
+
code_point = Integer(m, 10)
|
581
|
+
end
|
582
|
+
[code_point].pack('U*')
|
544
583
|
}
|
545
584
|
matches.collect!{|x|x[0]}.compact!
|
585
|
+
if filter
|
586
|
+
matches.reject! do |entity_reference|
|
587
|
+
filter.include?(entity_reference)
|
588
|
+
end
|
589
|
+
end
|
546
590
|
if matches.size > 0
|
547
|
-
matches.each do |entity_reference|
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
591
|
+
matches.tally.each do |entity_reference, n|
|
592
|
+
entity_expansion_count_before = @entity_expansion_count
|
593
|
+
entity_value = entity( entity_reference, entities )
|
594
|
+
if entity_value
|
595
|
+
if n > 1
|
596
|
+
entity_expansion_count_delta =
|
597
|
+
@entity_expansion_count - entity_expansion_count_before
|
598
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
599
|
+
end
|
600
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
601
|
+
rv.gsub!( re, entity_value )
|
602
|
+
if rv.bytesize > @entity_expansion_text_limit
|
603
|
+
raise "entity expansion has grown too large"
|
556
604
|
end
|
605
|
+
else
|
606
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
607
|
+
rv.gsub!( er[0], er[2] ) if er
|
557
608
|
end
|
558
609
|
end
|
559
610
|
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
@@ -562,6 +613,39 @@ module REXML
|
|
562
613
|
end
|
563
614
|
|
564
615
|
private
|
616
|
+
def add_namespace(prefix, uri)
|
617
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
618
|
+
if uri.nil?
|
619
|
+
@namespaces.delete(prefix)
|
620
|
+
else
|
621
|
+
@namespaces[prefix] = uri
|
622
|
+
end
|
623
|
+
end
|
624
|
+
|
625
|
+
def push_namespaces_restore
|
626
|
+
namespaces_restore = {}
|
627
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
628
|
+
namespaces_restore
|
629
|
+
end
|
630
|
+
|
631
|
+
def pop_namespaces_restore
|
632
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
633
|
+
namespaces_restore.each do |prefix, uri|
|
634
|
+
if uri.nil?
|
635
|
+
@namespaces.delete(prefix)
|
636
|
+
else
|
637
|
+
@namespaces[prefix] = uri
|
638
|
+
end
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
def record_entity_expansion(delta=1)
|
643
|
+
@entity_expansion_count += delta
|
644
|
+
if @entity_expansion_count > @entity_expansion_limit
|
645
|
+
raise "number of entity expansions exceeded, processing aborted."
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
565
649
|
def need_source_encoding_update?(xml_declaration_encoding)
|
566
650
|
return false if xml_declaration_encoding.nil?
|
567
651
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -571,14 +655,14 @@ module REXML
|
|
571
655
|
def parse_name(base_error_message)
|
572
656
|
md = @source.match(Private::NAME_PATTERN, true)
|
573
657
|
unless md
|
574
|
-
if @source.match(/\
|
658
|
+
if @source.match?(/\S/um)
|
575
659
|
message = "#{base_error_message}: invalid name"
|
576
660
|
else
|
577
661
|
message = "#{base_error_message}: name is missing"
|
578
662
|
end
|
579
663
|
raise REXML::ParseException.new(message, @source)
|
580
664
|
end
|
581
|
-
md[
|
665
|
+
md[0]
|
582
666
|
end
|
583
667
|
|
584
668
|
def parse_id(base_error_message,
|
@@ -613,52 +697,58 @@ module REXML
|
|
613
697
|
accept_public_id:)
|
614
698
|
public = /\A\s*PUBLIC/um
|
615
699
|
system = /\A\s*SYSTEM/um
|
616
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
617
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
700
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
701
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
618
702
|
return "public ID literal is missing"
|
619
703
|
end
|
620
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
704
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
621
705
|
return "invalid public ID literal"
|
622
706
|
end
|
623
707
|
if accept_public_id
|
624
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
708
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
625
709
|
return "system ID literal is missing"
|
626
710
|
end
|
627
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
711
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
628
712
|
return "invalid system literal"
|
629
713
|
end
|
630
714
|
"garbage after system literal"
|
631
715
|
else
|
632
716
|
"garbage after public ID literal"
|
633
717
|
end
|
634
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
635
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
718
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
719
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
636
720
|
return "system literal is missing"
|
637
721
|
end
|
638
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
722
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
639
723
|
return "invalid system literal"
|
640
724
|
end
|
641
725
|
"garbage after system literal"
|
642
726
|
else
|
643
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
727
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
644
728
|
return "invalid ID type"
|
645
729
|
end
|
646
730
|
"ID type is missing"
|
647
731
|
end
|
648
732
|
end
|
649
733
|
|
650
|
-
def process_instruction
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
734
|
+
def process_instruction
|
735
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
736
|
+
if @source.match?(/\s+/um, true)
|
737
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
738
|
+
unless match_data
|
739
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
740
|
+
end
|
741
|
+
content = match_data[1]
|
742
|
+
else
|
743
|
+
content = nil
|
744
|
+
unless @source.match?("?>", true)
|
745
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
746
|
+
end
|
656
747
|
end
|
657
|
-
if
|
748
|
+
if name == "xml"
|
658
749
|
if @document_status
|
659
750
|
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
751
|
end
|
661
|
-
content = match_data[2]
|
662
752
|
version = VERSION.match(content)
|
663
753
|
version = version[1] unless version.nil?
|
664
754
|
encoding = ENCODING.match(content)
|
@@ -673,16 +763,17 @@ module REXML
|
|
673
763
|
standalone = standalone[1] unless standalone.nil?
|
674
764
|
return [ :xmldecl, version, encoding, standalone ]
|
675
765
|
end
|
676
|
-
[:processing_instruction,
|
766
|
+
[:processing_instruction, name, content]
|
677
767
|
end
|
678
768
|
|
679
|
-
def parse_attributes(prefixes
|
769
|
+
def parse_attributes(prefixes)
|
680
770
|
attributes = {}
|
771
|
+
expanded_names = {}
|
681
772
|
closed = false
|
682
773
|
while true
|
683
|
-
if @source.match(">", true)
|
774
|
+
if @source.match?(">", true)
|
684
775
|
return attributes, closed
|
685
|
-
elsif @source.match("/>", true)
|
776
|
+
elsif @source.match?("/>", true)
|
686
777
|
closed = true
|
687
778
|
return attributes, closed
|
688
779
|
elsif match = @source.match(QNAME, true)
|
@@ -690,7 +781,7 @@ module REXML
|
|
690
781
|
prefix = match[2]
|
691
782
|
local_part = match[3]
|
692
783
|
|
693
|
-
unless @source.match(/\s*=\s*/um, true)
|
784
|
+
unless @source.match?(/\s*=\s*/um, true)
|
694
785
|
message = "Missing attribute equal: <#{name}>"
|
695
786
|
raise REXML::ParseException.new(message, @source)
|
696
787
|
end
|
@@ -706,10 +797,10 @@ module REXML
|
|
706
797
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
707
798
|
raise REXML::ParseException.new(message, @source)
|
708
799
|
end
|
709
|
-
@source.match(/\s*/um, true)
|
800
|
+
@source.match?(/\s*/um, true)
|
710
801
|
if prefix == "xmlns"
|
711
802
|
if local_part == "xml"
|
712
|
-
if value !=
|
803
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
713
804
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
714
805
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
715
806
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -719,7 +810,7 @@ module REXML
|
|
719
810
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
720
811
|
raise REXML::ParseException.new( msg, @source, self)
|
721
812
|
end
|
722
|
-
|
813
|
+
add_namespace(local_part, value)
|
723
814
|
elsif prefix
|
724
815
|
prefixes << prefix unless prefix == "xml"
|
725
816
|
end
|
@@ -729,6 +820,20 @@ module REXML
|
|
729
820
|
raise REXML::ParseException.new(msg, @source, self)
|
730
821
|
end
|
731
822
|
|
823
|
+
unless prefix == "xmlns"
|
824
|
+
uri = @namespaces[prefix]
|
825
|
+
expanded_name = [uri, local_part]
|
826
|
+
existing_prefix = expanded_names[expanded_name]
|
827
|
+
if existing_prefix
|
828
|
+
message = "Namespace conflict in adding attribute " +
|
829
|
+
"\"#{local_part}\": " +
|
830
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
831
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
832
|
+
raise REXML::ParseException.new(message, @source, self)
|
833
|
+
end
|
834
|
+
expanded_names[expanded_name] = prefix
|
835
|
+
end
|
836
|
+
|
732
837
|
attributes[name] = value
|
733
838
|
else
|
734
839
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -81,6 +93,10 @@ module REXML
|
|
81
93
|
def unshift token
|
82
94
|
@my_stack.unshift token
|
83
95
|
end
|
96
|
+
|
97
|
+
def reset
|
98
|
+
@parser.reset
|
99
|
+
end
|
84
100
|
end
|
85
101
|
|
86
102
|
# A parsing event. The contents of the event are accessed as an +Array?,
|