rexml 3.3.2 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
10
27
  if StringScanner::Version < "3.0.8"
11
28
  module StringScannerCaptures
12
29
  refine StringScanner do
@@ -124,29 +141,22 @@ module REXML
124
141
  }
125
142
 
126
143
  module Private
127
- # Terminal requires two or more letters.
128
- INSTRUCTION_TERM = "?>"
129
- COMMENT_TERM = "-->"
130
- CDATA_TERM = "]]>"
131
- DOCTYPE_TERM = "]>"
132
- # Read to the end of DOCTYPE because there is no proper ENTITY termination
133
- ENTITY_TERM = DOCTYPE_TERM
134
-
135
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
136
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
137
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
138
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
139
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
140
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
141
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
142
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
143
152
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
144
- CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
145
154
  DEFAULT_ENTITIES_PATTERNS = {}
146
155
  default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
147
156
  default_entities.each do |term|
148
157
  DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
149
158
  end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
150
160
  end
151
161
  private_constant :Private
152
162
 
@@ -154,6 +164,10 @@ module REXML
154
164
  self.stream = source
155
165
  @listeners = []
156
166
  @prefixes = Set.new
167
+ @entity_expansion_count = 0
168
+ @entity_expansion_limit = Security.entity_expansion_limit
169
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
157
171
  end
158
172
 
159
173
  def add_listener( listener )
@@ -161,16 +175,24 @@ module REXML
161
175
  end
162
176
 
163
177
  attr_reader :source
178
+ attr_reader :entity_expansion_count
179
+ attr_writer :entity_expansion_limit
180
+ attr_writer :entity_expansion_text_limit
164
181
 
165
182
  def stream=( source )
166
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
167
188
  @closed = nil
168
189
  @have_root = false
169
190
  @document_status = nil
170
191
  @tags = []
171
192
  @stack = []
172
193
  @entities = []
173
- @nsstack = []
194
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
195
+ @namespaces_restore_stack = []
174
196
  end
175
197
 
176
198
  def position
@@ -238,6 +260,10 @@ module REXML
238
260
  if @document_status == :in_doctype
239
261
  raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
240
262
  end
263
+ unless @tags.empty?
264
+ path = "/" + @tags.join("/")
265
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
266
+ end
241
267
  return [ :end_document ]
242
268
  end
243
269
  return @stack.shift if @stack.size > 0
@@ -247,11 +273,11 @@ module REXML
247
273
  @source.ensure_buffer
248
274
  if @document_status == nil
249
275
  start_position = @source.position
250
- if @source.match("<?", true)
251
- return process_instruction(start_position)
252
- elsif @source.match("<!", true)
253
- if @source.match("--", true)
254
- md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
276
+ if @source.match?("<?", true)
277
+ return process_instruction
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
280
+ md = @source.match(/(.*?)-->/um, true)
255
281
  if md.nil?
256
282
  raise REXML::ParseException.new("Unclosed comment", @source)
257
283
  end
@@ -259,10 +285,10 @@ module REXML
259
285
  raise REXML::ParseException.new("Malformed comment", @source)
260
286
  end
261
287
  return [ :comment, md[1] ]
262
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
263
289
  base_error_message = "Malformed DOCTYPE"
264
- unless @source.match(/\s+/um, true)
265
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
266
292
  message = "#{base_error_message}: name is missing"
267
293
  else
268
294
  message = "#{base_error_message}: invalid name"
@@ -270,12 +296,11 @@ module REXML
270
296
  @source.position = start_position
271
297
  raise REXML::ParseException.new(message, @source)
272
298
  end
273
- @nsstack.unshift(Set.new)
274
299
  name = parse_name(base_error_message)
275
- if @source.match(/\s*\[/um, true)
300
+ if @source.match?(/\s*\[/um, true)
276
301
  id = [nil, nil, nil]
277
302
  @document_status = :in_doctype
278
- elsif @source.match(/\s*>/um, true)
303
+ elsif @source.match?(/\s*>/um, true)
279
304
  id = [nil, nil, nil]
280
305
  @document_status = :after_doctype
281
306
  @source.ensure_buffer
@@ -287,9 +312,9 @@ module REXML
287
312
  # For backward compatibility
288
313
  id[1], id[2] = id[2], nil
289
314
  end
290
- if @source.match(/\s*\[/um, true)
315
+ if @source.match?(/\s*\[/um, true)
291
316
  @document_status = :in_doctype
292
- elsif @source.match(/\s*>/um, true)
317
+ elsif @source.match?(/\s*>/um, true)
293
318
  @document_status = :after_doctype
294
319
  @source.ensure_buffer
295
320
  else
@@ -299,7 +324,7 @@ module REXML
299
324
  end
300
325
  args = [:start_doctype, name, *id]
301
326
  if @document_status == :after_doctype
302
- @source.match(/\s*/um, true)
327
+ @source.match?(/\s*/um, true)
303
328
  @stack << [ :end_doctype ]
304
329
  end
305
330
  return args
@@ -310,15 +335,19 @@ module REXML
310
335
  end
311
336
  end
312
337
  if @document_status == :in_doctype
313
- @source.match(/\s*/um, true) # skip spaces
338
+ @source.match?(/\s*/um, true) # skip spaces
314
339
  start_position = @source.position
315
- if @source.match("<!", true)
316
- if @source.match("ELEMENT", true)
340
+ if @source.match?("<!", true)
341
+ if @source.match?("ELEMENT", true)
317
342
  md = @source.match(/(.*?)>/um, true)
318
343
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
319
344
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
320
- elsif @source.match("ENTITY", true)
321
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
345
+ elsif @source.match?("ENTITY", true)
346
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
347
+ unless match_data
348
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
349
+ end
350
+ match = [:entitydecl, *match_data.captures.compact]
322
351
  ref = false
323
352
  if match[1] == '%'
324
353
  ref = true
@@ -336,6 +365,8 @@ module REXML
336
365
  match[4] = match[4][1..-2] # HREF
337
366
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
338
367
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
368
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
369
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
339
370
  else
340
371
  match[2] = match[2][1..-2]
341
372
  match.pop if match.size == 4
@@ -343,7 +374,7 @@ module REXML
343
374
  end
344
375
  match << '%' if ref
345
376
  return match
346
- elsif @source.match("ATTLIST", true)
377
+ elsif @source.match?("ATTLIST", true)
347
378
  md = @source.match(Private::ATTLISTDECL_END, true)
348
379
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
349
380
  element = md[1]
@@ -358,15 +389,15 @@ module REXML
358
389
  val = attdef[4] if val == "#FIXED "
359
390
  pairs[attdef[0]] = val
360
391
  if attdef[0] =~ /^xmlns:(.*)/
361
- @nsstack[0] << $1
392
+ @namespaces[$1] = val
362
393
  end
363
394
  end
364
395
  end
365
396
  return [ :attlistdecl, element, pairs, contents ]
366
- elsif @source.match("NOTATION", true)
397
+ elsif @source.match?("NOTATION", true)
367
398
  base_error_message = "Malformed notation declaration"
368
- unless @source.match(/\s+/um, true)
369
- if @source.match(">")
399
+ unless @source.match?(/\s+/um, true)
400
+ if @source.match?(">")
370
401
  message = "#{base_error_message}: name is missing"
371
402
  else
372
403
  message = "#{base_error_message}: invalid name"
@@ -378,21 +409,21 @@ module REXML
378
409
  id = parse_id(base_error_message,
379
410
  accept_external_id: true,
380
411
  accept_public_id: true)
381
- unless @source.match(/\s*>/um, true)
412
+ unless @source.match?(/\s*>/um, true)
382
413
  message = "#{base_error_message}: garbage before end >"
383
414
  raise REXML::ParseException.new(message, @source)
384
415
  end
385
416
  return [:notationdecl, name, *id]
386
- elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
417
+ elsif md = @source.match(/--(.*?)-->/um, true)
387
418
  case md[1]
388
419
  when /--/, /-\z/
389
420
  raise REXML::ParseException.new("Malformed comment", @source)
390
421
  end
391
422
  return [ :comment, md[1] ] if md
392
423
  end
393
- elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
424
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
394
425
  return [ :externalentity, match[1] ]
395
- elsif @source.match(/\]\s*>/um, true)
426
+ elsif @source.match?(/\]\s*>/um, true)
396
427
  @document_status = :after_doctype
397
428
  return [ :end_doctype ]
398
429
  end
@@ -401,17 +432,17 @@ module REXML
401
432
  end
402
433
  end
403
434
  if @document_status == :after_doctype
404
- @source.match(/\s*/um, true)
435
+ @source.match?(/\s*/um, true)
405
436
  end
406
437
  begin
407
438
  start_position = @source.position
408
- if @source.match("<", true)
439
+ if @source.match?("<", true)
409
440
  # :text's read_until may remain only "<" in buffer. In the
410
441
  # case, buffer is empty here. So we need to fill buffer
411
442
  # here explicitly.
412
443
  @source.ensure_buffer
413
- if @source.match("/", true)
414
- @nsstack.shift
444
+ if @source.match?("/", true)
445
+ @namespaces_restore_stack.pop
415
446
  last_tag = @tags.pop
416
447
  md = @source.match(Private::CLOSE_PATTERN, true)
417
448
  if md and !last_tag
@@ -425,12 +456,12 @@ module REXML
425
456
  raise REXML::ParseException.new(message, @source)
426
457
  end
427
458
  return [ :end_element, last_tag ]
428
- elsif @source.match("!", true)
459
+ elsif @source.match?("!", true)
429
460
  md = @source.match(/([^>]*>)/um)
430
461
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
431
462
  raise REXML::ParseException.new("Malformed node", @source) unless md
432
463
  if md[0][0] == ?-
433
- md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
464
+ md = @source.match(/--(.*?)-->/um, true)
434
465
 
435
466
  if md.nil? || /--|-\z/.match?(md[1])
436
467
  raise REXML::ParseException.new("Malformed comment", @source)
@@ -438,13 +469,13 @@ module REXML
438
469
 
439
470
  return [ :comment, md[1] ]
440
471
  else
441
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
472
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
442
473
  return [ :cdata, md[1] ] if md
443
474
  end
444
475
  raise REXML::ParseException.new( "Declarations can only occur "+
445
476
  "in the doctype declaration.", @source)
446
- elsif @source.match("?", true)
447
- return process_instruction(start_position)
477
+ elsif @source.match?("?", true)
478
+ return process_instruction
448
479
  else
449
480
  # Get the next tag
450
481
  md = @source.match(Private::TAG_PATTERN, true)
@@ -456,18 +487,18 @@ module REXML
456
487
  @document_status = :in_element
457
488
  @prefixes.clear
458
489
  @prefixes << md[2] if md[2]
459
- @nsstack.unshift(curr_ns=Set.new)
460
- attributes, closed = parse_attributes(@prefixes, curr_ns)
490
+ push_namespaces_restore
491
+ attributes, closed = parse_attributes(@prefixes)
461
492
  # Verify that all of the prefixes have been defined
462
493
  for prefix in @prefixes
463
- unless @nsstack.find{|k| k.member?(prefix)}
494
+ unless @namespaces.key?(prefix)
464
495
  raise UndefinedNamespaceException.new(prefix,@source,self)
465
496
  end
466
497
  end
467
498
 
468
499
  if closed
469
500
  @closed = tag
470
- @nsstack.shift
501
+ pop_namespaces_restore
471
502
  else
472
503
  if @tags.empty? and @have_root
473
504
  raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
@@ -482,11 +513,15 @@ module REXML
482
513
  if text.chomp!("<")
483
514
  @source.position -= "<".bytesize
484
515
  end
485
- if @tags.empty? and @have_root
516
+ if @tags.empty?
486
517
  unless /\A\s*\z/.match?(text)
487
- raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
518
+ if @have_root
519
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
520
+ else
521
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
522
+ end
488
523
  end
489
- return pull_event
524
+ return pull_event if @have_root
490
525
  end
491
526
  return [ :text, text ]
492
527
  end
@@ -503,13 +538,13 @@ module REXML
503
538
  private :pull_event
504
539
 
505
540
  def entity( reference, entities )
506
- value = nil
507
- value = entities[ reference ] if entities
508
- if not value
509
- value = DEFAULT_ENTITIES[ reference ]
510
- value = value[2] if value
511
- end
512
- unnormalize( value, entities ) if value
541
+ return unless entities
542
+
543
+ value = entities[ reference ]
544
+ return if value.nil?
545
+
546
+ record_entity_expansion
547
+ unnormalize( value, entities )
513
548
  end
514
549
 
515
550
  # Escapes all possible entities
@@ -539,21 +574,37 @@ module REXML
539
574
  return rv if matches.size == 0
540
575
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
541
576
  m=$1
542
- m = "0#{m}" if m[0] == ?x
543
- [Integer(m)].pack('U*')
577
+ if m.start_with?("x")
578
+ code_point = Integer(m[1..-1], 16)
579
+ else
580
+ code_point = Integer(m, 10)
581
+ end
582
+ [code_point].pack('U*')
544
583
  }
545
584
  matches.collect!{|x|x[0]}.compact!
585
+ if filter
586
+ matches.reject! do |entity_reference|
587
+ filter.include?(entity_reference)
588
+ end
589
+ end
546
590
  if matches.size > 0
547
- matches.each do |entity_reference|
548
- unless filter and filter.include?(entity_reference)
549
- entity_value = entity( entity_reference, entities )
550
- if entity_value
551
- re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
552
- rv.gsub!( re, entity_value )
553
- else
554
- er = DEFAULT_ENTITIES[entity_reference]
555
- rv.gsub!( er[0], er[2] ) if er
591
+ matches.tally.each do |entity_reference, n|
592
+ entity_expansion_count_before = @entity_expansion_count
593
+ entity_value = entity( entity_reference, entities )
594
+ if entity_value
595
+ if n > 1
596
+ entity_expansion_count_delta =
597
+ @entity_expansion_count - entity_expansion_count_before
598
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
599
+ end
600
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
601
+ rv.gsub!( re, entity_value )
602
+ if rv.bytesize > @entity_expansion_text_limit
603
+ raise "entity expansion has grown too large"
556
604
  end
605
+ else
606
+ er = DEFAULT_ENTITIES[entity_reference]
607
+ rv.gsub!( er[0], er[2] ) if er
557
608
  end
558
609
  end
559
610
  rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
@@ -562,6 +613,39 @@ module REXML
562
613
  end
563
614
 
564
615
  private
616
+ def add_namespace(prefix, uri)
617
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
618
+ if uri.nil?
619
+ @namespaces.delete(prefix)
620
+ else
621
+ @namespaces[prefix] = uri
622
+ end
623
+ end
624
+
625
+ def push_namespaces_restore
626
+ namespaces_restore = {}
627
+ @namespaces_restore_stack.push(namespaces_restore)
628
+ namespaces_restore
629
+ end
630
+
631
+ def pop_namespaces_restore
632
+ namespaces_restore = @namespaces_restore_stack.pop
633
+ namespaces_restore.each do |prefix, uri|
634
+ if uri.nil?
635
+ @namespaces.delete(prefix)
636
+ else
637
+ @namespaces[prefix] = uri
638
+ end
639
+ end
640
+ end
641
+
642
+ def record_entity_expansion(delta=1)
643
+ @entity_expansion_count += delta
644
+ if @entity_expansion_count > @entity_expansion_limit
645
+ raise "number of entity expansions exceeded, processing aborted."
646
+ end
647
+ end
648
+
565
649
  def need_source_encoding_update?(xml_declaration_encoding)
566
650
  return false if xml_declaration_encoding.nil?
567
651
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -571,14 +655,14 @@ module REXML
571
655
  def parse_name(base_error_message)
572
656
  md = @source.match(Private::NAME_PATTERN, true)
573
657
  unless md
574
- if @source.match(/\s*\S/um)
658
+ if @source.match?(/\S/um)
575
659
  message = "#{base_error_message}: invalid name"
576
660
  else
577
661
  message = "#{base_error_message}: name is missing"
578
662
  end
579
663
  raise REXML::ParseException.new(message, @source)
580
664
  end
581
- md[1]
665
+ md[0]
582
666
  end
583
667
 
584
668
  def parse_id(base_error_message,
@@ -613,52 +697,58 @@ module REXML
613
697
  accept_public_id:)
614
698
  public = /\A\s*PUBLIC/um
615
699
  system = /\A\s*SYSTEM/um
616
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
617
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
700
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
701
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
618
702
  return "public ID literal is missing"
619
703
  end
620
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
704
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
621
705
  return "invalid public ID literal"
622
706
  end
623
707
  if accept_public_id
624
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
708
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
625
709
  return "system ID literal is missing"
626
710
  end
627
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
711
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
628
712
  return "invalid system literal"
629
713
  end
630
714
  "garbage after system literal"
631
715
  else
632
716
  "garbage after public ID literal"
633
717
  end
634
- elsif accept_external_id and @source.match(/#{system}/um)
635
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
718
+ elsif accept_external_id and @source.match?(/#{system}/um)
719
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
636
720
  return "system literal is missing"
637
721
  end
638
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
722
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
639
723
  return "invalid system literal"
640
724
  end
641
725
  "garbage after system literal"
642
726
  else
643
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
727
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
644
728
  return "invalid ID type"
645
729
  end
646
730
  "ID type is missing"
647
731
  end
648
732
  end
649
733
 
650
- def process_instruction(start_position)
651
- match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
652
- unless match_data
653
- message = "Invalid processing instruction node"
654
- @source.position = start_position
655
- raise REXML::ParseException.new(message, @source)
734
+ def process_instruction
735
+ name = parse_name("Malformed XML: Invalid processing instruction node")
736
+ if @source.match?(/\s+/um, true)
737
+ match_data = @source.match(/(.*?)\?>/um, true)
738
+ unless match_data
739
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
740
+ end
741
+ content = match_data[1]
742
+ else
743
+ content = nil
744
+ unless @source.match?("?>", true)
745
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
746
+ end
656
747
  end
657
- if match_data[1] == "xml"
748
+ if name == "xml"
658
749
  if @document_status
659
750
  raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
660
751
  end
661
- content = match_data[2]
662
752
  version = VERSION.match(content)
663
753
  version = version[1] unless version.nil?
664
754
  encoding = ENCODING.match(content)
@@ -673,16 +763,17 @@ module REXML
673
763
  standalone = standalone[1] unless standalone.nil?
674
764
  return [ :xmldecl, version, encoding, standalone ]
675
765
  end
676
- [:processing_instruction, match_data[1], match_data[2]]
766
+ [:processing_instruction, name, content]
677
767
  end
678
768
 
679
- def parse_attributes(prefixes, curr_ns)
769
+ def parse_attributes(prefixes)
680
770
  attributes = {}
771
+ expanded_names = {}
681
772
  closed = false
682
773
  while true
683
- if @source.match(">", true)
774
+ if @source.match?(">", true)
684
775
  return attributes, closed
685
- elsif @source.match("/>", true)
776
+ elsif @source.match?("/>", true)
686
777
  closed = true
687
778
  return attributes, closed
688
779
  elsif match = @source.match(QNAME, true)
@@ -690,7 +781,7 @@ module REXML
690
781
  prefix = match[2]
691
782
  local_part = match[3]
692
783
 
693
- unless @source.match(/\s*=\s*/um, true)
784
+ unless @source.match?(/\s*=\s*/um, true)
694
785
  message = "Missing attribute equal: <#{name}>"
695
786
  raise REXML::ParseException.new(message, @source)
696
787
  end
@@ -706,10 +797,10 @@ module REXML
706
797
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
707
798
  raise REXML::ParseException.new(message, @source)
708
799
  end
709
- @source.match(/\s*/um, true)
800
+ @source.match?(/\s*/um, true)
710
801
  if prefix == "xmlns"
711
802
  if local_part == "xml"
712
- if value != "http://www.w3.org/XML/1998/namespace"
803
+ if value != Private::XML_PREFIXED_NAMESPACE
713
804
  msg = "The 'xml' prefix must not be bound to any other namespace "+
714
805
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
715
806
  raise REXML::ParseException.new( msg, @source, self )
@@ -719,7 +810,7 @@ module REXML
719
810
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
720
811
  raise REXML::ParseException.new( msg, @source, self)
721
812
  end
722
- curr_ns << local_part
813
+ add_namespace(local_part, value)
723
814
  elsif prefix
724
815
  prefixes << prefix unless prefix == "xml"
725
816
  end
@@ -729,6 +820,20 @@ module REXML
729
820
  raise REXML::ParseException.new(msg, @source, self)
730
821
  end
731
822
 
823
+ unless prefix == "xmlns"
824
+ uri = @namespaces[prefix]
825
+ expanded_name = [uri, local_part]
826
+ existing_prefix = expanded_names[expanded_name]
827
+ if existing_prefix
828
+ message = "Namespace conflict in adding attribute " +
829
+ "\"#{local_part}\": " +
830
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
831
+ "prefix \"#{prefix}\" = \"#{uri}\""
832
+ raise REXML::ParseException.new(message, @source, self)
833
+ end
834
+ expanded_names[expanded_name] = prefix
835
+ end
836
+
732
837
  attributes[name] = value
733
838
  else
734
839
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -81,6 +93,10 @@ module REXML
81
93
  def unshift token
82
94
  @my_stack.unshift token
83
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
84
100
  end
85
101
 
86
102
  # A parsing event. The contents of the event are accessed as an +Array?,