rexml 3.2.7 → 3.3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
27
+ if StringScanner::Version < "3.0.8"
28
+ module StringScannerCaptures
29
+ refine StringScanner do
30
+ def captures
31
+ values_at(*(1...size))
32
+ end
33
+ end
34
+ end
35
+ using StringScannerCaptures
36
+ end
37
+
10
38
  # = Using the Pull Parser
11
39
  # <em>This API is experimental, and subject to change.</em>
12
40
  # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ -113,21 +141,31 @@ module REXML
113
141
  }
114
142
 
115
143
  module Private
116
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
117
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
118
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
119
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
120
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
121
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
122
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
123
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
+ CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
154
+ DEFAULT_ENTITIES_PATTERNS = {}
155
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
+ default_entities.each do |term|
157
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
+ end
124
159
  end
125
160
  private_constant :Private
126
- include Private
127
161
 
128
162
  def initialize( source )
129
163
  self.stream = source
130
164
  @listeners = []
165
+ @prefixes = Set.new
166
+ @entity_expansion_count = 0
167
+ @entity_expansion_limit = Security.entity_expansion_limit
168
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
131
169
  end
132
170
 
133
171
  def add_listener( listener )
@@ -135,15 +173,20 @@ module REXML
135
173
  end
136
174
 
137
175
  attr_reader :source
176
+ attr_reader :entity_expansion_count
177
+ attr_writer :entity_expansion_limit
178
+ attr_writer :entity_expansion_text_limit
138
179
 
139
180
  def stream=( source )
140
181
  @source = SourceFactory.create_from( source )
141
182
  @closed = nil
183
+ @have_root = false
142
184
  @document_status = nil
143
185
  @tags = []
144
186
  @stack = []
145
187
  @entities = []
146
- @nsstack = []
188
+ @namespaces = {}
189
+ @namespaces_restore_stack = []
147
190
  end
148
191
 
149
192
  def position
@@ -193,6 +236,8 @@ module REXML
193
236
 
194
237
  # Returns the next event. This is a +PullEvent+ object.
195
238
  def pull
239
+ @source.drop_parsed_content
240
+
196
241
  pull_event.tap do |event|
197
242
  @listeners.each do |listener|
198
243
  listener.receive event
@@ -205,7 +250,16 @@ module REXML
205
250
  x, @closed = @closed, nil
206
251
  return [ :end_element, x ]
207
252
  end
208
- return [ :end_document ] if empty?
253
+ if empty?
254
+ if @document_status == :in_doctype
255
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
256
+ end
257
+ unless @tags.empty?
258
+ path = "/" + @tags.join("/")
259
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
260
+ end
261
+ return [ :end_document ]
262
+ end
209
263
  return @stack.shift if @stack.size > 0
210
264
  #STDERR.puts @source.encoding
211
265
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -214,10 +268,17 @@ module REXML
214
268
  if @document_status == nil
215
269
  start_position = @source.position
216
270
  if @source.match("<?", true)
217
- return process_instruction(start_position)
271
+ return process_instruction
218
272
  elsif @source.match("<!", true)
219
273
  if @source.match("--", true)
220
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
274
+ md = @source.match(/(.*?)-->/um, true)
275
+ if md.nil?
276
+ raise REXML::ParseException.new("Unclosed comment", @source)
277
+ end
278
+ if /--|-\z/.match?(md[1])
279
+ raise REXML::ParseException.new("Malformed comment", @source)
280
+ end
281
+ return [ :comment, md[1] ]
221
282
  elsif @source.match("DOCTYPE", true)
222
283
  base_error_message = "Malformed DOCTYPE"
223
284
  unless @source.match(/\s+/um, true)
@@ -229,7 +290,6 @@ module REXML
229
290
  @source.position = start_position
230
291
  raise REXML::ParseException.new(message, @source)
231
292
  end
232
- @nsstack.unshift(curr_ns=Set.new)
233
293
  name = parse_name(base_error_message)
234
294
  if @source.match(/\s*\[/um, true)
235
295
  id = [nil, nil, nil]
@@ -277,7 +337,11 @@ module REXML
277
337
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
278
338
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
279
339
  elsif @source.match("ENTITY", true)
280
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
340
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
341
+ unless match_data
342
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
343
+ end
344
+ match = [:entitydecl, *match_data.captures.compact]
281
345
  ref = false
282
346
  if match[1] == '%'
283
347
  ref = true
@@ -295,6 +359,8 @@ module REXML
295
359
  match[4] = match[4][1..-2] # HREF
296
360
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
297
361
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
362
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
363
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
298
364
  else
299
365
  match[2] = match[2][1..-2]
300
366
  match.pop if match.size == 4
@@ -303,13 +369,13 @@ module REXML
303
369
  match << '%' if ref
304
370
  return match
305
371
  elsif @source.match("ATTLIST", true)
306
- md = @source.match(ATTLISTDECL_END, true)
372
+ md = @source.match(Private::ATTLISTDECL_END, true)
307
373
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
308
374
  element = md[1]
309
375
  contents = md[0]
310
376
 
311
377
  pairs = {}
312
- values = md[0].scan( ATTDEF_RE )
378
+ values = md[0].strip.scan( ATTDEF_RE )
313
379
  values.each do |attdef|
314
380
  unless attdef[3] == "#IMPLIED"
315
381
  attdef.compact!
@@ -317,7 +383,7 @@ module REXML
317
383
  val = attdef[4] if val == "#FIXED "
318
384
  pairs[attdef[0]] = val
319
385
  if attdef[0] =~ /^xmlns:(.*)/
320
- @nsstack[0] << $1
386
+ @namespaces[$1] = val
321
387
  end
322
388
  end
323
389
  end
@@ -355,6 +421,9 @@ module REXML
355
421
  @document_status = :after_doctype
356
422
  return [ :end_doctype ]
357
423
  end
424
+ if @document_status == :in_doctype
425
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
426
+ end
358
427
  end
359
428
  if @document_status == :after_doctype
360
429
  @source.match(/\s*/um, true)
@@ -362,10 +431,14 @@ module REXML
362
431
  begin
363
432
  start_position = @source.position
364
433
  if @source.match("<", true)
434
+ # :text's read_until may remain only "<" in buffer. In the
435
+ # case, buffer is empty here. So we need to fill buffer
436
+ # here explicitly.
437
+ @source.ensure_buffer
365
438
  if @source.match("/", true)
366
- @nsstack.shift
439
+ @namespaces_restore_stack.pop
367
440
  last_tag = @tags.pop
368
- md = @source.match(CLOSE_PATTERN, true)
441
+ md = @source.match(Private::CLOSE_PATTERN, true)
369
442
  if md and !last_tag
370
443
  message = "Unexpected top-level end tag (got '#{md[1]}')"
371
444
  raise REXML::ParseException.new(message, @source)
@@ -384,12 +457,11 @@ module REXML
384
457
  if md[0][0] == ?-
385
458
  md = @source.match(/--(.*?)-->/um, true)
386
459
 
387
- case md[1]
388
- when /--/, /-\z/
460
+ if md.nil? || /--|-\z/.match?(md[1])
389
461
  raise REXML::ParseException.new("Malformed comment", @source)
390
462
  end
391
463
 
392
- return [ :comment, md[1] ] if md
464
+ return [ :comment, md[1] ]
393
465
  else
394
466
  md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
395
467
  return [ :cdata, md[1] ] if md
@@ -397,38 +469,54 @@ module REXML
397
469
  raise REXML::ParseException.new( "Declarations can only occur "+
398
470
  "in the doctype declaration.", @source)
399
471
  elsif @source.match("?", true)
400
- return process_instruction(start_position)
472
+ return process_instruction
401
473
  else
402
474
  # Get the next tag
403
- md = @source.match(TAG_PATTERN, true)
475
+ md = @source.match(Private::TAG_PATTERN, true)
404
476
  unless md
405
477
  @source.position = start_position
406
478
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
407
479
  end
408
480
  tag = md[1]
409
481
  @document_status = :in_element
410
- prefixes = Set.new
411
- prefixes << md[2] if md[2]
412
- @nsstack.unshift(curr_ns=Set.new)
413
- attributes, closed = parse_attributes(prefixes, curr_ns)
482
+ @prefixes.clear
483
+ @prefixes << md[2] if md[2]
484
+ push_namespaces_restore
485
+ attributes, closed = parse_attributes(@prefixes)
414
486
  # Verify that all of the prefixes have been defined
415
- for prefix in prefixes
416
- unless @nsstack.find{|k| k.member?(prefix)}
487
+ for prefix in @prefixes
488
+ unless @namespaces.key?(prefix)
417
489
  raise UndefinedNamespaceException.new(prefix,@source,self)
418
490
  end
419
491
  end
420
492
 
421
493
  if closed
422
494
  @closed = tag
423
- @nsstack.shift
495
+ pop_namespaces_restore
424
496
  else
497
+ if @tags.empty? and @have_root
498
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
499
+ end
425
500
  @tags.push( tag )
426
501
  end
502
+ @have_root = true
427
503
  return [ :start_element, tag, attributes ]
428
504
  end
429
505
  else
430
- md = @source.match(/([^<]*)/um, true)
431
- text = md[1]
506
+ text = @source.read_until("<")
507
+ if text.chomp!("<")
508
+ @source.position -= "<".bytesize
509
+ end
510
+ if @tags.empty?
511
+ unless /\A\s*\z/.match?(text)
512
+ if @have_root
513
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
514
+ else
515
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
516
+ end
517
+ end
518
+ return pull_event if @have_root
519
+ end
432
520
  return [ :text, text ]
433
521
  end
434
522
  rescue REXML::UndefinedNamespaceException
@@ -444,13 +532,13 @@ module REXML
444
532
  private :pull_event
445
533
 
446
534
  def entity( reference, entities )
447
- value = nil
448
- value = entities[ reference ] if entities
449
- if not value
450
- value = DEFAULT_ENTITIES[ reference ]
451
- value = value[2] if value
452
- end
453
- unnormalize( value, entities ) if value
535
+ return unless entities
536
+
537
+ value = entities[ reference ]
538
+ return if value.nil?
539
+
540
+ record_entity_expansion
541
+ unnormalize( value, entities )
454
542
  end
455
543
 
456
544
  # Escapes all possible entities
@@ -471,34 +559,83 @@ module REXML
471
559
 
472
560
  # Unescapes all possible entities
473
561
  def unnormalize( string, entities=nil, filter=nil )
474
- rv = string.gsub( /\r\n?/, "\n" )
562
+ if string.include?("\r")
563
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
564
+ else
565
+ rv = string.dup
566
+ end
475
567
  matches = rv.scan( REFERENCE_RE )
476
568
  return rv if matches.size == 0
477
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
569
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
478
570
  m=$1
479
571
  m = "0#{m}" if m[0] == ?x
480
572
  [Integer(m)].pack('U*')
481
573
  }
482
574
  matches.collect!{|x|x[0]}.compact!
575
+ if filter
576
+ matches.reject! do |entity_reference|
577
+ filter.include?(entity_reference)
578
+ end
579
+ end
483
580
  if matches.size > 0
484
- matches.each do |entity_reference|
485
- unless filter and filter.include?(entity_reference)
486
- entity_value = entity( entity_reference, entities )
487
- if entity_value
488
- re = /&#{entity_reference};/
489
- rv.gsub!( re, entity_value )
490
- else
491
- er = DEFAULT_ENTITIES[entity_reference]
492
- rv.gsub!( er[0], er[2] ) if er
581
+ matches.tally.each do |entity_reference, n|
582
+ entity_expansion_count_before = @entity_expansion_count
583
+ entity_value = entity( entity_reference, entities )
584
+ if entity_value
585
+ if n > 1
586
+ entity_expansion_count_delta =
587
+ @entity_expansion_count - entity_expansion_count_before
588
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
589
+ end
590
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
591
+ rv.gsub!( re, entity_value )
592
+ if rv.bytesize > @entity_expansion_text_limit
593
+ raise "entity expansion has grown too large"
493
594
  end
595
+ else
596
+ er = DEFAULT_ENTITIES[entity_reference]
597
+ rv.gsub!( er[0], er[2] ) if er
494
598
  end
495
599
  end
496
- rv.gsub!( /&amp;/, '&' )
600
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
497
601
  end
498
602
  rv
499
603
  end
500
604
 
501
605
  private
606
+ def add_namespace(prefix, uri)
607
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
608
+ if uri.nil?
609
+ @namespaces.delete(prefix)
610
+ else
611
+ @namespaces[prefix] = uri
612
+ end
613
+ end
614
+
615
+ def push_namespaces_restore
616
+ namespaces_restore = {}
617
+ @namespaces_restore_stack.push(namespaces_restore)
618
+ namespaces_restore
619
+ end
620
+
621
+ def pop_namespaces_restore
622
+ namespaces_restore = @namespaces_restore_stack.pop
623
+ namespaces_restore.each do |prefix, uri|
624
+ if uri.nil?
625
+ @namespaces.delete(prefix)
626
+ else
627
+ @namespaces[prefix] = uri
628
+ end
629
+ end
630
+ end
631
+
632
+ def record_entity_expansion(delta=1)
633
+ @entity_expansion_count += delta
634
+ if @entity_expansion_count > @entity_expansion_limit
635
+ raise "number of entity expansions exceeded, processing aborted."
636
+ end
637
+ end
638
+
502
639
  def need_source_encoding_update?(xml_declaration_encoding)
503
640
  return false if xml_declaration_encoding.nil?
504
641
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -506,16 +643,16 @@ module REXML
506
643
  end
507
644
 
508
645
  def parse_name(base_error_message)
509
- md = @source.match(NAME_PATTERN, true)
646
+ md = @source.match(Private::NAME_PATTERN, true)
510
647
  unless md
511
- if @source.match(/\s*\S/um)
648
+ if @source.match(/\S/um)
512
649
  message = "#{base_error_message}: invalid name"
513
650
  else
514
651
  message = "#{base_error_message}: name is missing"
515
652
  end
516
653
  raise REXML::ParseException.new(message, @source)
517
654
  end
518
- md[1]
655
+ md[0]
519
656
  end
520
657
 
521
658
  def parse_id(base_error_message,
@@ -584,15 +721,24 @@ module REXML
584
721
  end
585
722
  end
586
723
 
587
- def process_instruction(start_position)
588
- match_data = @source.match(INSTRUCTION_END, true)
589
- unless match_data
590
- message = "Invalid processing instruction node"
591
- @source.position = start_position
592
- raise REXML::ParseException.new(message, @source)
724
+ def process_instruction
725
+ name = parse_name("Malformed XML: Invalid processing instruction node")
726
+ if @source.match(/\s+/um, true)
727
+ match_data = @source.match(/(.*?)\?>/um, true)
728
+ unless match_data
729
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
730
+ end
731
+ content = match_data[1]
732
+ else
733
+ content = nil
734
+ unless @source.match("?>", true)
735
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
736
+ end
593
737
  end
594
- if @document_status.nil? and match_data[1] == "xml"
595
- content = match_data[2]
738
+ if name == "xml"
739
+ if @document_status
740
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
741
+ end
596
742
  version = VERSION.match(content)
597
743
  version = version[1] unless version.nil?
598
744
  encoding = ENCODING.match(content)
@@ -607,11 +753,12 @@ module REXML
607
753
  standalone = standalone[1] unless standalone.nil?
608
754
  return [ :xmldecl, version, encoding, standalone ]
609
755
  end
610
- [:processing_instruction, match_data[1], match_data[2]]
756
+ [:processing_instruction, name, content]
611
757
  end
612
758
 
613
- def parse_attributes(prefixes, curr_ns)
759
+ def parse_attributes(prefixes)
614
760
  attributes = {}
761
+ expanded_names = {}
615
762
  closed = false
616
763
  while true
617
764
  if @source.match(">", true)
@@ -633,8 +780,10 @@ module REXML
633
780
  raise REXML::ParseException.new(message, @source)
634
781
  end
635
782
  quote = match[1]
783
+ start_position = @source.position
636
784
  value = @source.read_until(quote)
637
785
  unless value.chomp!(quote)
786
+ @source.position = start_position
638
787
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
639
788
  raise REXML::ParseException.new(message, @source)
640
789
  end
@@ -651,7 +800,7 @@ module REXML
651
800
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
652
801
  raise REXML::ParseException.new( msg, @source, self)
653
802
  end
654
- curr_ns << local_part
803
+ add_namespace(local_part, value)
655
804
  elsif prefix
656
805
  prefixes << prefix unless prefix == "xml"
657
806
  end
@@ -661,6 +810,20 @@ module REXML
661
810
  raise REXML::ParseException.new(msg, @source, self)
662
811
  end
663
812
 
813
+ unless prefix == "xmlns"
814
+ uri = @namespaces[prefix]
815
+ expanded_name = [uri, local_part]
816
+ existing_prefix = expanded_names[expanded_name]
817
+ if existing_prefix
818
+ message = "Namespace conflict in adding attribute " +
819
+ "\"#{local_part}\": " +
820
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
821
+ "prefix \"#{prefix}\" = \"#{uri}\""
822
+ raise REXML::ParseException.new(message, @source, self)
823
+ end
824
+ expanded_names[expanded_name] = prefix
825
+ end
826
+
664
827
  attributes[name] = value
665
828
  else
666
829
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -157,25 +169,8 @@ module REXML
157
169
  end
158
170
  end
159
171
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
172
+ unnormalized = @parser.unnormalize( event[1], @entities )
173
+ handle( :characters, unnormalized )
179
174
  when :entitydecl
180
175
  handle_entitydecl( event )
181
176
  when :processing_instruction, :comment, :attlistdecl,
@@ -7,37 +7,42 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
45
+ @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
43
48
  when :start_doctype
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]