rexml 3.2.8 → 3.3.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
27
+ if StringScanner::Version < "3.0.8"
28
+ module StringScannerCaptures
29
+ refine StringScanner do
30
+ def captures
31
+ values_at(*(1...size))
32
+ end
33
+ end
34
+ end
35
+ using StringScannerCaptures
36
+ end
37
+
10
38
  # = Using the Pull Parser
11
39
  # <em>This API is experimental, and subject to change.</em>
12
40
  # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ -113,21 +141,33 @@ module REXML
113
141
  }
114
142
 
115
143
  module Private
116
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
117
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
118
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
119
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
120
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
121
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
122
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
123
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154
+ DEFAULT_ENTITIES_PATTERNS = {}
155
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
+ default_entities.each do |term|
157
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
+ end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
124
160
  end
125
161
  private_constant :Private
126
- include Private
127
162
 
128
163
  def initialize( source )
129
164
  self.stream = source
130
165
  @listeners = []
166
+ @prefixes = Set.new
167
+ @entity_expansion_count = 0
168
+ @entity_expansion_limit = Security.entity_expansion_limit
169
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
131
171
  end
132
172
 
133
173
  def add_listener( listener )
@@ -135,15 +175,20 @@ module REXML
135
175
  end
136
176
 
137
177
  attr_reader :source
178
+ attr_reader :entity_expansion_count
179
+ attr_writer :entity_expansion_limit
180
+ attr_writer :entity_expansion_text_limit
138
181
 
139
182
  def stream=( source )
140
183
  @source = SourceFactory.create_from( source )
141
184
  @closed = nil
185
+ @have_root = false
142
186
  @document_status = nil
143
187
  @tags = []
144
188
  @stack = []
145
189
  @entities = []
146
- @nsstack = []
190
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
191
+ @namespaces_restore_stack = []
147
192
  end
148
193
 
149
194
  def position
@@ -193,6 +238,8 @@ module REXML
193
238
 
194
239
  # Returns the next event. This is a +PullEvent+ object.
195
240
  def pull
241
+ @source.drop_parsed_content
242
+
196
243
  pull_event.tap do |event|
197
244
  @listeners.each do |listener|
198
245
  listener.receive event
@@ -205,7 +252,16 @@ module REXML
205
252
  x, @closed = @closed, nil
206
253
  return [ :end_element, x ]
207
254
  end
208
- return [ :end_document ] if empty?
255
+ if empty?
256
+ if @document_status == :in_doctype
257
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
258
+ end
259
+ unless @tags.empty?
260
+ path = "/" + @tags.join("/")
261
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
262
+ end
263
+ return [ :end_document ]
264
+ end
209
265
  return @stack.shift if @stack.size > 0
210
266
  #STDERR.puts @source.encoding
211
267
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -214,10 +270,17 @@ module REXML
214
270
  if @document_status == nil
215
271
  start_position = @source.position
216
272
  if @source.match("<?", true)
217
- return process_instruction(start_position)
273
+ return process_instruction
218
274
  elsif @source.match("<!", true)
219
275
  if @source.match("--", true)
220
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
276
+ md = @source.match(/(.*?)-->/um, true)
277
+ if md.nil?
278
+ raise REXML::ParseException.new("Unclosed comment", @source)
279
+ end
280
+ if /--|-\z/.match?(md[1])
281
+ raise REXML::ParseException.new("Malformed comment", @source)
282
+ end
283
+ return [ :comment, md[1] ]
221
284
  elsif @source.match("DOCTYPE", true)
222
285
  base_error_message = "Malformed DOCTYPE"
223
286
  unless @source.match(/\s+/um, true)
@@ -229,7 +292,6 @@ module REXML
229
292
  @source.position = start_position
230
293
  raise REXML::ParseException.new(message, @source)
231
294
  end
232
- @nsstack.unshift(curr_ns=Set.new)
233
295
  name = parse_name(base_error_message)
234
296
  if @source.match(/\s*\[/um, true)
235
297
  id = [nil, nil, nil]
@@ -277,7 +339,11 @@ module REXML
277
339
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
278
340
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
279
341
  elsif @source.match("ENTITY", true)
280
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
342
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
343
+ unless match_data
344
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
345
+ end
346
+ match = [:entitydecl, *match_data.captures.compact]
281
347
  ref = false
282
348
  if match[1] == '%'
283
349
  ref = true
@@ -295,6 +361,8 @@ module REXML
295
361
  match[4] = match[4][1..-2] # HREF
296
362
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
297
363
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
364
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
365
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
298
366
  else
299
367
  match[2] = match[2][1..-2]
300
368
  match.pop if match.size == 4
@@ -303,13 +371,13 @@ module REXML
303
371
  match << '%' if ref
304
372
  return match
305
373
  elsif @source.match("ATTLIST", true)
306
- md = @source.match(ATTLISTDECL_END, true)
374
+ md = @source.match(Private::ATTLISTDECL_END, true)
307
375
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
308
376
  element = md[1]
309
377
  contents = md[0]
310
378
 
311
379
  pairs = {}
312
- values = md[0].scan( ATTDEF_RE )
380
+ values = md[0].strip.scan( ATTDEF_RE )
313
381
  values.each do |attdef|
314
382
  unless attdef[3] == "#IMPLIED"
315
383
  attdef.compact!
@@ -317,7 +385,7 @@ module REXML
317
385
  val = attdef[4] if val == "#FIXED "
318
386
  pairs[attdef[0]] = val
319
387
  if attdef[0] =~ /^xmlns:(.*)/
320
- @nsstack[0] << $1
388
+ @namespaces[$1] = val
321
389
  end
322
390
  end
323
391
  end
@@ -355,6 +423,9 @@ module REXML
355
423
  @document_status = :after_doctype
356
424
  return [ :end_doctype ]
357
425
  end
426
+ if @document_status == :in_doctype
427
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
428
+ end
358
429
  end
359
430
  if @document_status == :after_doctype
360
431
  @source.match(/\s*/um, true)
@@ -362,10 +433,14 @@ module REXML
362
433
  begin
363
434
  start_position = @source.position
364
435
  if @source.match("<", true)
436
+ # :text's read_until may remain only "<" in buffer. In the
437
+ # case, buffer is empty here. So we need to fill buffer
438
+ # here explicitly.
439
+ @source.ensure_buffer
365
440
  if @source.match("/", true)
366
- @nsstack.shift
441
+ @namespaces_restore_stack.pop
367
442
  last_tag = @tags.pop
368
- md = @source.match(CLOSE_PATTERN, true)
443
+ md = @source.match(Private::CLOSE_PATTERN, true)
369
444
  if md and !last_tag
370
445
  message = "Unexpected top-level end tag (got '#{md[1]}')"
371
446
  raise REXML::ParseException.new(message, @source)
@@ -384,12 +459,11 @@ module REXML
384
459
  if md[0][0] == ?-
385
460
  md = @source.match(/--(.*?)-->/um, true)
386
461
 
387
- case md[1]
388
- when /--/, /-\z/
462
+ if md.nil? || /--|-\z/.match?(md[1])
389
463
  raise REXML::ParseException.new("Malformed comment", @source)
390
464
  end
391
465
 
392
- return [ :comment, md[1] ] if md
466
+ return [ :comment, md[1] ]
393
467
  else
394
468
  md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
395
469
  return [ :cdata, md[1] ] if md
@@ -397,38 +471,54 @@ module REXML
397
471
  raise REXML::ParseException.new( "Declarations can only occur "+
398
472
  "in the doctype declaration.", @source)
399
473
  elsif @source.match("?", true)
400
- return process_instruction(start_position)
474
+ return process_instruction
401
475
  else
402
476
  # Get the next tag
403
- md = @source.match(TAG_PATTERN, true)
477
+ md = @source.match(Private::TAG_PATTERN, true)
404
478
  unless md
405
479
  @source.position = start_position
406
480
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
407
481
  end
408
482
  tag = md[1]
409
483
  @document_status = :in_element
410
- prefixes = Set.new
411
- prefixes << md[2] if md[2]
412
- @nsstack.unshift(curr_ns=Set.new)
413
- attributes, closed = parse_attributes(prefixes, curr_ns)
484
+ @prefixes.clear
485
+ @prefixes << md[2] if md[2]
486
+ push_namespaces_restore
487
+ attributes, closed = parse_attributes(@prefixes)
414
488
  # Verify that all of the prefixes have been defined
415
- for prefix in prefixes
416
- unless @nsstack.find{|k| k.member?(prefix)}
489
+ for prefix in @prefixes
490
+ unless @namespaces.key?(prefix)
417
491
  raise UndefinedNamespaceException.new(prefix,@source,self)
418
492
  end
419
493
  end
420
494
 
421
495
  if closed
422
496
  @closed = tag
423
- @nsstack.shift
497
+ pop_namespaces_restore
424
498
  else
499
+ if @tags.empty? and @have_root
500
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
501
+ end
425
502
  @tags.push( tag )
426
503
  end
504
+ @have_root = true
427
505
  return [ :start_element, tag, attributes ]
428
506
  end
429
507
  else
430
- md = @source.match(/([^<]*)/um, true)
431
- text = md[1]
508
+ text = @source.read_until("<")
509
+ if text.chomp!("<")
510
+ @source.position -= "<".bytesize
511
+ end
512
+ if @tags.empty?
513
+ unless /\A\s*\z/.match?(text)
514
+ if @have_root
515
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
516
+ else
517
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
518
+ end
519
+ end
520
+ return pull_event if @have_root
521
+ end
432
522
  return [ :text, text ]
433
523
  end
434
524
  rescue REXML::UndefinedNamespaceException
@@ -444,13 +534,13 @@ module REXML
444
534
  private :pull_event
445
535
 
446
536
  def entity( reference, entities )
447
- value = nil
448
- value = entities[ reference ] if entities
449
- if not value
450
- value = DEFAULT_ENTITIES[ reference ]
451
- value = value[2] if value
452
- end
453
- unnormalize( value, entities ) if value
537
+ return unless entities
538
+
539
+ value = entities[ reference ]
540
+ return if value.nil?
541
+
542
+ record_entity_expansion
543
+ unnormalize( value, entities )
454
544
  end
455
545
 
456
546
  # Escapes all possible entities
@@ -471,34 +561,87 @@ module REXML
471
561
 
472
562
  # Unescapes all possible entities
473
563
  def unnormalize( string, entities=nil, filter=nil )
474
- rv = string.gsub( /\r\n?/, "\n" )
564
+ if string.include?("\r")
565
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
566
+ else
567
+ rv = string.dup
568
+ end
475
569
  matches = rv.scan( REFERENCE_RE )
476
570
  return rv if matches.size == 0
477
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
571
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
478
572
  m=$1
479
- m = "0#{m}" if m[0] == ?x
480
- [Integer(m)].pack('U*')
573
+ if m.start_with?("x")
574
+ code_point = Integer(m[1..-1], 16)
575
+ else
576
+ code_point = Integer(m, 10)
577
+ end
578
+ [code_point].pack('U*')
481
579
  }
482
580
  matches.collect!{|x|x[0]}.compact!
581
+ if filter
582
+ matches.reject! do |entity_reference|
583
+ filter.include?(entity_reference)
584
+ end
585
+ end
483
586
  if matches.size > 0
484
- matches.each do |entity_reference|
485
- unless filter and filter.include?(entity_reference)
486
- entity_value = entity( entity_reference, entities )
487
- if entity_value
488
- re = /&#{entity_reference};/
489
- rv.gsub!( re, entity_value )
490
- else
491
- er = DEFAULT_ENTITIES[entity_reference]
492
- rv.gsub!( er[0], er[2] ) if er
587
+ matches.tally.each do |entity_reference, n|
588
+ entity_expansion_count_before = @entity_expansion_count
589
+ entity_value = entity( entity_reference, entities )
590
+ if entity_value
591
+ if n > 1
592
+ entity_expansion_count_delta =
593
+ @entity_expansion_count - entity_expansion_count_before
594
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
595
+ end
596
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
597
+ rv.gsub!( re, entity_value )
598
+ if rv.bytesize > @entity_expansion_text_limit
599
+ raise "entity expansion has grown too large"
493
600
  end
601
+ else
602
+ er = DEFAULT_ENTITIES[entity_reference]
603
+ rv.gsub!( er[0], er[2] ) if er
494
604
  end
495
605
  end
496
- rv.gsub!( /&amp;/, '&' )
606
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
497
607
  end
498
608
  rv
499
609
  end
500
610
 
501
611
  private
612
+ def add_namespace(prefix, uri)
613
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
614
+ if uri.nil?
615
+ @namespaces.delete(prefix)
616
+ else
617
+ @namespaces[prefix] = uri
618
+ end
619
+ end
620
+
621
+ def push_namespaces_restore
622
+ namespaces_restore = {}
623
+ @namespaces_restore_stack.push(namespaces_restore)
624
+ namespaces_restore
625
+ end
626
+
627
+ def pop_namespaces_restore
628
+ namespaces_restore = @namespaces_restore_stack.pop
629
+ namespaces_restore.each do |prefix, uri|
630
+ if uri.nil?
631
+ @namespaces.delete(prefix)
632
+ else
633
+ @namespaces[prefix] = uri
634
+ end
635
+ end
636
+ end
637
+
638
+ def record_entity_expansion(delta=1)
639
+ @entity_expansion_count += delta
640
+ if @entity_expansion_count > @entity_expansion_limit
641
+ raise "number of entity expansions exceeded, processing aborted."
642
+ end
643
+ end
644
+
502
645
  def need_source_encoding_update?(xml_declaration_encoding)
503
646
  return false if xml_declaration_encoding.nil?
504
647
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -506,16 +649,16 @@ module REXML
506
649
  end
507
650
 
508
651
  def parse_name(base_error_message)
509
- md = @source.match(NAME_PATTERN, true)
652
+ md = @source.match(Private::NAME_PATTERN, true)
510
653
  unless md
511
- if @source.match(/\s*\S/um)
654
+ if @source.match(/\S/um)
512
655
  message = "#{base_error_message}: invalid name"
513
656
  else
514
657
  message = "#{base_error_message}: name is missing"
515
658
  end
516
659
  raise REXML::ParseException.new(message, @source)
517
660
  end
518
- md[1]
661
+ md[0]
519
662
  end
520
663
 
521
664
  def parse_id(base_error_message,
@@ -584,15 +727,24 @@ module REXML
584
727
  end
585
728
  end
586
729
 
587
- def process_instruction(start_position)
588
- match_data = @source.match(INSTRUCTION_END, true)
589
- unless match_data
590
- message = "Invalid processing instruction node"
591
- @source.position = start_position
592
- raise REXML::ParseException.new(message, @source)
730
+ def process_instruction
731
+ name = parse_name("Malformed XML: Invalid processing instruction node")
732
+ if @source.match(/\s+/um, true)
733
+ match_data = @source.match(/(.*?)\?>/um, true)
734
+ unless match_data
735
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
736
+ end
737
+ content = match_data[1]
738
+ else
739
+ content = nil
740
+ unless @source.match("?>", true)
741
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
742
+ end
593
743
  end
594
- if @document_status.nil? and match_data[1] == "xml"
595
- content = match_data[2]
744
+ if name == "xml"
745
+ if @document_status
746
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
747
+ end
596
748
  version = VERSION.match(content)
597
749
  version = version[1] unless version.nil?
598
750
  encoding = ENCODING.match(content)
@@ -607,11 +759,12 @@ module REXML
607
759
  standalone = standalone[1] unless standalone.nil?
608
760
  return [ :xmldecl, version, encoding, standalone ]
609
761
  end
610
- [:processing_instruction, match_data[1], match_data[2]]
762
+ [:processing_instruction, name, content]
611
763
  end
612
764
 
613
- def parse_attributes(prefixes, curr_ns)
765
+ def parse_attributes(prefixes)
614
766
  attributes = {}
767
+ expanded_names = {}
615
768
  closed = false
616
769
  while true
617
770
  if @source.match(">", true)
@@ -633,15 +786,17 @@ module REXML
633
786
  raise REXML::ParseException.new(message, @source)
634
787
  end
635
788
  quote = match[1]
789
+ start_position = @source.position
636
790
  value = @source.read_until(quote)
637
791
  unless value.chomp!(quote)
792
+ @source.position = start_position
638
793
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
639
794
  raise REXML::ParseException.new(message, @source)
640
795
  end
641
796
  @source.match(/\s*/um, true)
642
797
  if prefix == "xmlns"
643
798
  if local_part == "xml"
644
- if value != "http://www.w3.org/XML/1998/namespace"
799
+ if value != Private::XML_PREFIXED_NAMESPACE
645
800
  msg = "The 'xml' prefix must not be bound to any other namespace "+
646
801
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
647
802
  raise REXML::ParseException.new( msg, @source, self )
@@ -651,7 +806,7 @@ module REXML
651
806
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
652
807
  raise REXML::ParseException.new( msg, @source, self)
653
808
  end
654
- curr_ns << local_part
809
+ add_namespace(local_part, value)
655
810
  elsif prefix
656
811
  prefixes << prefix unless prefix == "xml"
657
812
  end
@@ -661,6 +816,20 @@ module REXML
661
816
  raise REXML::ParseException.new(msg, @source, self)
662
817
  end
663
818
 
819
+ unless prefix == "xmlns"
820
+ uri = @namespaces[prefix]
821
+ expanded_name = [uri, local_part]
822
+ existing_prefix = expanded_names[expanded_name]
823
+ if existing_prefix
824
+ message = "Namespace conflict in adding attribute " +
825
+ "\"#{local_part}\": " +
826
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
827
+ "prefix \"#{prefix}\" = \"#{uri}\""
828
+ raise REXML::ParseException.new(message, @source, self)
829
+ end
830
+ expanded_names[expanded_name] = prefix
831
+ end
832
+
664
833
  attributes[name] = value
665
834
  else
666
835
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -157,25 +169,8 @@ module REXML
157
169
  end
158
170
  end
159
171
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
172
+ unnormalized = @parser.unnormalize( event[1], @entities )
173
+ handle( :characters, unnormalized )
179
174
  when :entitydecl
180
175
  handle_entitydecl( event )
181
176
  when :processing_instruction, :comment, :attlistdecl,
@@ -264,6 +259,8 @@ module REXML
264
259
  end
265
260
 
266
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
267
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
268
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
269
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -7,37 +7,42 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
45
+ @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
43
48
  when :start_doctype
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]