rexml 3.3.2 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
10
27
  if StringScanner::Version < "3.0.8"
11
28
  module StringScannerCaptures
12
29
  refine StringScanner do
@@ -124,29 +141,22 @@ module REXML
124
141
  }
125
142
 
126
143
  module Private
127
- # Terminal requires two or more letters.
128
- INSTRUCTION_TERM = "?>"
129
- COMMENT_TERM = "-->"
130
- CDATA_TERM = "]]>"
131
- DOCTYPE_TERM = "]>"
132
- # Read to the end of DOCTYPE because there is no proper ENTITY termination
133
- ENTITY_TERM = DOCTYPE_TERM
134
-
135
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
136
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
137
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
138
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
139
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
140
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
141
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
142
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
143
152
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
144
- CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
145
154
  DEFAULT_ENTITIES_PATTERNS = {}
146
155
  default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
147
156
  default_entities.each do |term|
148
157
  DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
149
158
  end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
150
160
  end
151
161
  private_constant :Private
152
162
 
@@ -154,6 +164,10 @@ module REXML
154
164
  self.stream = source
155
165
  @listeners = []
156
166
  @prefixes = Set.new
167
+ @entity_expansion_count = 0
168
+ @entity_expansion_limit = Security.entity_expansion_limit
169
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
157
171
  end
158
172
 
159
173
  def add_listener( listener )
@@ -161,16 +175,24 @@ module REXML
161
175
  end
162
176
 
163
177
  attr_reader :source
178
+ attr_reader :entity_expansion_count
179
+ attr_writer :entity_expansion_limit
180
+ attr_writer :entity_expansion_text_limit
164
181
 
165
182
  def stream=( source )
166
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
167
188
  @closed = nil
168
189
  @have_root = false
169
190
  @document_status = nil
170
191
  @tags = []
171
192
  @stack = []
172
193
  @entities = []
173
- @nsstack = []
194
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
195
+ @namespaces_restore_stack = []
174
196
  end
175
197
 
176
198
  def position
@@ -238,6 +260,10 @@ module REXML
238
260
  if @document_status == :in_doctype
239
261
  raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
240
262
  end
263
+ unless @tags.empty?
264
+ path = "/" + @tags.join("/")
265
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
266
+ end
241
267
  return [ :end_document ]
242
268
  end
243
269
  return @stack.shift if @stack.size > 0
@@ -247,11 +273,11 @@ module REXML
247
273
  @source.ensure_buffer
248
274
  if @document_status == nil
249
275
  start_position = @source.position
250
- if @source.match("<?", true)
251
- return process_instruction(start_position)
252
- elsif @source.match("<!", true)
253
- if @source.match("--", true)
254
- md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
276
+ if @source.match?("<?", true)
277
+ return process_instruction
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
280
+ md = @source.match(/(.*?)-->/um, true)
255
281
  if md.nil?
256
282
  raise REXML::ParseException.new("Unclosed comment", @source)
257
283
  end
@@ -259,10 +285,10 @@ module REXML
259
285
  raise REXML::ParseException.new("Malformed comment", @source)
260
286
  end
261
287
  return [ :comment, md[1] ]
262
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
263
289
  base_error_message = "Malformed DOCTYPE"
264
- unless @source.match(/\s+/um, true)
265
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
266
292
  message = "#{base_error_message}: name is missing"
267
293
  else
268
294
  message = "#{base_error_message}: invalid name"
@@ -270,12 +296,11 @@ module REXML
270
296
  @source.position = start_position
271
297
  raise REXML::ParseException.new(message, @source)
272
298
  end
273
- @nsstack.unshift(Set.new)
274
299
  name = parse_name(base_error_message)
275
- if @source.match(/\s*\[/um, true)
300
+ if @source.match?(/\s*\[/um, true)
276
301
  id = [nil, nil, nil]
277
302
  @document_status = :in_doctype
278
- elsif @source.match(/\s*>/um, true)
303
+ elsif @source.match?(/\s*>/um, true)
279
304
  id = [nil, nil, nil]
280
305
  @document_status = :after_doctype
281
306
  @source.ensure_buffer
@@ -287,9 +312,9 @@ module REXML
287
312
  # For backward compatibility
288
313
  id[1], id[2] = id[2], nil
289
314
  end
290
- if @source.match(/\s*\[/um, true)
315
+ if @source.match?(/\s*\[/um, true)
291
316
  @document_status = :in_doctype
292
- elsif @source.match(/\s*>/um, true)
317
+ elsif @source.match?(/\s*>/um, true)
293
318
  @document_status = :after_doctype
294
319
  @source.ensure_buffer
295
320
  else
@@ -299,7 +324,7 @@ module REXML
299
324
  end
300
325
  args = [:start_doctype, name, *id]
301
326
  if @document_status == :after_doctype
302
- @source.match(/\s*/um, true)
327
+ @source.match?(/\s*/um, true)
303
328
  @stack << [ :end_doctype ]
304
329
  end
305
330
  return args
@@ -310,15 +335,19 @@ module REXML
310
335
  end
311
336
  end
312
337
  if @document_status == :in_doctype
313
- @source.match(/\s*/um, true) # skip spaces
338
+ @source.match?(/\s*/um, true) # skip spaces
314
339
  start_position = @source.position
315
- if @source.match("<!", true)
316
- if @source.match("ELEMENT", true)
340
+ if @source.match?("<!", true)
341
+ if @source.match?("ELEMENT", true)
317
342
  md = @source.match(/(.*?)>/um, true)
318
343
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
319
344
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
320
- elsif @source.match("ENTITY", true)
321
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
345
+ elsif @source.match?("ENTITY", true)
346
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
347
+ unless match_data
348
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
349
+ end
350
+ match = [:entitydecl, *match_data.captures.compact]
322
351
  ref = false
323
352
  if match[1] == '%'
324
353
  ref = true
@@ -336,6 +365,8 @@ module REXML
336
365
  match[4] = match[4][1..-2] # HREF
337
366
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
338
367
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
368
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
369
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
339
370
  else
340
371
  match[2] = match[2][1..-2]
341
372
  match.pop if match.size == 4
@@ -343,7 +374,7 @@ module REXML
343
374
  end
344
375
  match << '%' if ref
345
376
  return match
346
- elsif @source.match("ATTLIST", true)
377
+ elsif @source.match?("ATTLIST", true)
347
378
  md = @source.match(Private::ATTLISTDECL_END, true)
348
379
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
349
380
  element = md[1]
@@ -358,15 +389,15 @@ module REXML
358
389
  val = attdef[4] if val == "#FIXED "
359
390
  pairs[attdef[0]] = val
360
391
  if attdef[0] =~ /^xmlns:(.*)/
361
- @nsstack[0] << $1
392
+ @namespaces[$1] = val
362
393
  end
363
394
  end
364
395
  end
365
396
  return [ :attlistdecl, element, pairs, contents ]
366
- elsif @source.match("NOTATION", true)
397
+ elsif @source.match?("NOTATION", true)
367
398
  base_error_message = "Malformed notation declaration"
368
- unless @source.match(/\s+/um, true)
369
- if @source.match(">")
399
+ unless @source.match?(/\s+/um, true)
400
+ if @source.match?(">")
370
401
  message = "#{base_error_message}: name is missing"
371
402
  else
372
403
  message = "#{base_error_message}: invalid name"
@@ -378,21 +409,21 @@ module REXML
378
409
  id = parse_id(base_error_message,
379
410
  accept_external_id: true,
380
411
  accept_public_id: true)
381
- unless @source.match(/\s*>/um, true)
412
+ unless @source.match?(/\s*>/um, true)
382
413
  message = "#{base_error_message}: garbage before end >"
383
414
  raise REXML::ParseException.new(message, @source)
384
415
  end
385
416
  return [:notationdecl, name, *id]
386
- elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
417
+ elsif md = @source.match(/--(.*?)-->/um, true)
387
418
  case md[1]
388
419
  when /--/, /-\z/
389
420
  raise REXML::ParseException.new("Malformed comment", @source)
390
421
  end
391
422
  return [ :comment, md[1] ] if md
392
423
  end
393
- elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
424
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
394
425
  return [ :externalentity, match[1] ]
395
- elsif @source.match(/\]\s*>/um, true)
426
+ elsif @source.match?(/\]\s*>/um, true)
396
427
  @document_status = :after_doctype
397
428
  return [ :end_doctype ]
398
429
  end
@@ -401,17 +432,17 @@ module REXML
401
432
  end
402
433
  end
403
434
  if @document_status == :after_doctype
404
- @source.match(/\s*/um, true)
435
+ @source.match?(/\s*/um, true)
405
436
  end
406
437
  begin
407
438
  start_position = @source.position
408
- if @source.match("<", true)
439
+ if @source.match?("<", true)
409
440
  # :text's read_until may remain only "<" in buffer. In the
410
441
  # case, buffer is empty here. So we need to fill buffer
411
442
  # here explicitly.
412
443
  @source.ensure_buffer
413
- if @source.match("/", true)
414
- @nsstack.shift
444
+ if @source.match?("/", true)
445
+ @namespaces_restore_stack.pop
415
446
  last_tag = @tags.pop
416
447
  md = @source.match(Private::CLOSE_PATTERN, true)
417
448
  if md and !last_tag
@@ -425,12 +456,12 @@ module REXML
425
456
  raise REXML::ParseException.new(message, @source)
426
457
  end
427
458
  return [ :end_element, last_tag ]
428
- elsif @source.match("!", true)
459
+ elsif @source.match?("!", true)
429
460
  md = @source.match(/([^>]*>)/um)
430
461
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
431
462
  raise REXML::ParseException.new("Malformed node", @source) unless md
432
463
  if md[0][0] == ?-
433
- md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
464
+ md = @source.match(/--(.*?)-->/um, true)
434
465
 
435
466
  if md.nil? || /--|-\z/.match?(md[1])
436
467
  raise REXML::ParseException.new("Malformed comment", @source)
@@ -438,13 +469,13 @@ module REXML
438
469
 
439
470
  return [ :comment, md[1] ]
440
471
  else
441
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
472
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
442
473
  return [ :cdata, md[1] ] if md
443
474
  end
444
475
  raise REXML::ParseException.new( "Declarations can only occur "+
445
476
  "in the doctype declaration.", @source)
446
- elsif @source.match("?", true)
447
- return process_instruction(start_position)
477
+ elsif @source.match?("?", true)
478
+ return process_instruction
448
479
  else
449
480
  # Get the next tag
450
481
  md = @source.match(Private::TAG_PATTERN, true)
@@ -456,18 +487,18 @@ module REXML
456
487
  @document_status = :in_element
457
488
  @prefixes.clear
458
489
  @prefixes << md[2] if md[2]
459
- @nsstack.unshift(curr_ns=Set.new)
460
- attributes, closed = parse_attributes(@prefixes, curr_ns)
490
+ push_namespaces_restore
491
+ attributes, closed = parse_attributes(@prefixes)
461
492
  # Verify that all of the prefixes have been defined
462
493
  for prefix in @prefixes
463
- unless @nsstack.find{|k| k.member?(prefix)}
494
+ unless @namespaces.key?(prefix)
464
495
  raise UndefinedNamespaceException.new(prefix,@source,self)
465
496
  end
466
497
  end
467
498
 
468
499
  if closed
469
500
  @closed = tag
470
- @nsstack.shift
501
+ pop_namespaces_restore
471
502
  else
472
503
  if @tags.empty? and @have_root
473
504
  raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
@@ -482,11 +513,15 @@ module REXML
482
513
  if text.chomp!("<")
483
514
  @source.position -= "<".bytesize
484
515
  end
485
- if @tags.empty? and @have_root
516
+ if @tags.empty?
486
517
  unless /\A\s*\z/.match?(text)
487
- raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
518
+ if @have_root
519
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
520
+ else
521
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
522
+ end
488
523
  end
489
- return pull_event
524
+ return pull_event if @have_root
490
525
  end
491
526
  return [ :text, text ]
492
527
  end
@@ -503,13 +538,13 @@ module REXML
503
538
  private :pull_event
504
539
 
505
540
  def entity( reference, entities )
506
- value = nil
507
- value = entities[ reference ] if entities
508
- if not value
509
- value = DEFAULT_ENTITIES[ reference ]
510
- value = value[2] if value
511
- end
512
- unnormalize( value, entities ) if value
541
+ return unless entities
542
+
543
+ value = entities[ reference ]
544
+ return if value.nil?
545
+
546
+ record_entity_expansion
547
+ unnormalize( value, entities )
513
548
  end
514
549
 
515
550
  # Escapes all possible entities
@@ -539,21 +574,37 @@ module REXML
539
574
  return rv if matches.size == 0
540
575
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
541
576
  m=$1
542
- m = "0#{m}" if m[0] == ?x
543
- [Integer(m)].pack('U*')
577
+ if m.start_with?("x")
578
+ code_point = Integer(m[1..-1], 16)
579
+ else
580
+ code_point = Integer(m, 10)
581
+ end
582
+ [code_point].pack('U*')
544
583
  }
545
584
  matches.collect!{|x|x[0]}.compact!
585
+ if filter
586
+ matches.reject! do |entity_reference|
587
+ filter.include?(entity_reference)
588
+ end
589
+ end
546
590
  if matches.size > 0
547
- matches.each do |entity_reference|
548
- unless filter and filter.include?(entity_reference)
549
- entity_value = entity( entity_reference, entities )
550
- if entity_value
551
- re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
552
- rv.gsub!( re, entity_value )
553
- else
554
- er = DEFAULT_ENTITIES[entity_reference]
555
- rv.gsub!( er[0], er[2] ) if er
591
+ matches.tally.each do |entity_reference, n|
592
+ entity_expansion_count_before = @entity_expansion_count
593
+ entity_value = entity( entity_reference, entities )
594
+ if entity_value
595
+ if n > 1
596
+ entity_expansion_count_delta =
597
+ @entity_expansion_count - entity_expansion_count_before
598
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
599
+ end
600
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
601
+ rv.gsub!( re, entity_value )
602
+ if rv.bytesize > @entity_expansion_text_limit
603
+ raise "entity expansion has grown too large"
556
604
  end
605
+ else
606
+ er = DEFAULT_ENTITIES[entity_reference]
607
+ rv.gsub!( er[0], er[2] ) if er
557
608
  end
558
609
  end
559
610
  rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
@@ -562,6 +613,39 @@ module REXML
562
613
  end
563
614
 
564
615
  private
616
+ def add_namespace(prefix, uri)
617
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
618
+ if uri.nil?
619
+ @namespaces.delete(prefix)
620
+ else
621
+ @namespaces[prefix] = uri
622
+ end
623
+ end
624
+
625
+ def push_namespaces_restore
626
+ namespaces_restore = {}
627
+ @namespaces_restore_stack.push(namespaces_restore)
628
+ namespaces_restore
629
+ end
630
+
631
+ def pop_namespaces_restore
632
+ namespaces_restore = @namespaces_restore_stack.pop
633
+ namespaces_restore.each do |prefix, uri|
634
+ if uri.nil?
635
+ @namespaces.delete(prefix)
636
+ else
637
+ @namespaces[prefix] = uri
638
+ end
639
+ end
640
+ end
641
+
642
+ def record_entity_expansion(delta=1)
643
+ @entity_expansion_count += delta
644
+ if @entity_expansion_count > @entity_expansion_limit
645
+ raise "number of entity expansions exceeded, processing aborted."
646
+ end
647
+ end
648
+
565
649
  def need_source_encoding_update?(xml_declaration_encoding)
566
650
  return false if xml_declaration_encoding.nil?
567
651
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -571,14 +655,14 @@ module REXML
571
655
  def parse_name(base_error_message)
572
656
  md = @source.match(Private::NAME_PATTERN, true)
573
657
  unless md
574
- if @source.match(/\s*\S/um)
658
+ if @source.match?(/\S/um)
575
659
  message = "#{base_error_message}: invalid name"
576
660
  else
577
661
  message = "#{base_error_message}: name is missing"
578
662
  end
579
663
  raise REXML::ParseException.new(message, @source)
580
664
  end
581
- md[1]
665
+ md[0]
582
666
  end
583
667
 
584
668
  def parse_id(base_error_message,
@@ -613,52 +697,58 @@ module REXML
613
697
  accept_public_id:)
614
698
  public = /\A\s*PUBLIC/um
615
699
  system = /\A\s*SYSTEM/um
616
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
617
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
700
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
701
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
618
702
  return "public ID literal is missing"
619
703
  end
620
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
704
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
621
705
  return "invalid public ID literal"
622
706
  end
623
707
  if accept_public_id
624
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
708
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
625
709
  return "system ID literal is missing"
626
710
  end
627
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
711
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
628
712
  return "invalid system literal"
629
713
  end
630
714
  "garbage after system literal"
631
715
  else
632
716
  "garbage after public ID literal"
633
717
  end
634
- elsif accept_external_id and @source.match(/#{system}/um)
635
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
718
+ elsif accept_external_id and @source.match?(/#{system}/um)
719
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
636
720
  return "system literal is missing"
637
721
  end
638
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
722
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
639
723
  return "invalid system literal"
640
724
  end
641
725
  "garbage after system literal"
642
726
  else
643
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
727
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
644
728
  return "invalid ID type"
645
729
  end
646
730
  "ID type is missing"
647
731
  end
648
732
  end
649
733
 
650
- def process_instruction(start_position)
651
- match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
652
- unless match_data
653
- message = "Invalid processing instruction node"
654
- @source.position = start_position
655
- raise REXML::ParseException.new(message, @source)
734
+ def process_instruction
735
+ name = parse_name("Malformed XML: Invalid processing instruction node")
736
+ if @source.match?(/\s+/um, true)
737
+ match_data = @source.match(/(.*?)\?>/um, true)
738
+ unless match_data
739
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
740
+ end
741
+ content = match_data[1]
742
+ else
743
+ content = nil
744
+ unless @source.match?("?>", true)
745
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
746
+ end
656
747
  end
657
- if match_data[1] == "xml"
748
+ if name == "xml"
658
749
  if @document_status
659
750
  raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
660
751
  end
661
- content = match_data[2]
662
752
  version = VERSION.match(content)
663
753
  version = version[1] unless version.nil?
664
754
  encoding = ENCODING.match(content)
@@ -673,16 +763,17 @@ module REXML
673
763
  standalone = standalone[1] unless standalone.nil?
674
764
  return [ :xmldecl, version, encoding, standalone ]
675
765
  end
676
- [:processing_instruction, match_data[1], match_data[2]]
766
+ [:processing_instruction, name, content]
677
767
  end
678
768
 
679
- def parse_attributes(prefixes, curr_ns)
769
+ def parse_attributes(prefixes)
680
770
  attributes = {}
771
+ expanded_names = {}
681
772
  closed = false
682
773
  while true
683
- if @source.match(">", true)
774
+ if @source.match?(">", true)
684
775
  return attributes, closed
685
- elsif @source.match("/>", true)
776
+ elsif @source.match?("/>", true)
686
777
  closed = true
687
778
  return attributes, closed
688
779
  elsif match = @source.match(QNAME, true)
@@ -690,7 +781,7 @@ module REXML
690
781
  prefix = match[2]
691
782
  local_part = match[3]
692
783
 
693
- unless @source.match(/\s*=\s*/um, true)
784
+ unless @source.match?(/\s*=\s*/um, true)
694
785
  message = "Missing attribute equal: <#{name}>"
695
786
  raise REXML::ParseException.new(message, @source)
696
787
  end
@@ -706,10 +797,10 @@ module REXML
706
797
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
707
798
  raise REXML::ParseException.new(message, @source)
708
799
  end
709
- @source.match(/\s*/um, true)
800
+ @source.match?(/\s*/um, true)
710
801
  if prefix == "xmlns"
711
802
  if local_part == "xml"
712
- if value != "http://www.w3.org/XML/1998/namespace"
803
+ if value != Private::XML_PREFIXED_NAMESPACE
713
804
  msg = "The 'xml' prefix must not be bound to any other namespace "+
714
805
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
715
806
  raise REXML::ParseException.new( msg, @source, self )
@@ -719,7 +810,7 @@ module REXML
719
810
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
720
811
  raise REXML::ParseException.new( msg, @source, self)
721
812
  end
722
- curr_ns << local_part
813
+ add_namespace(local_part, value)
723
814
  elsif prefix
724
815
  prefixes << prefix unless prefix == "xml"
725
816
  end
@@ -729,6 +820,20 @@ module REXML
729
820
  raise REXML::ParseException.new(msg, @source, self)
730
821
  end
731
822
 
823
+ unless prefix == "xmlns"
824
+ uri = @namespaces[prefix]
825
+ expanded_name = [uri, local_part]
826
+ existing_prefix = expanded_names[expanded_name]
827
+ if existing_prefix
828
+ message = "Namespace conflict in adding attribute " +
829
+ "\"#{local_part}\": " +
830
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
831
+ "prefix \"#{prefix}\" = \"#{uri}\""
832
+ raise REXML::ParseException.new(message, @source, self)
833
+ end
834
+ expanded_names[expanded_name] = prefix
835
+ end
836
+
732
837
  attributes[name] = value
733
838
  else
734
839
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -81,6 +93,10 @@ module REXML
81
93
  def unshift token
82
94
  @my_stack.unshift token
83
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
84
100
  end
85
101
 
86
102
  # A parsing event. The contents of the event are accessed as an +Array?,