rexml 3.2.7 → 3.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

@@ -1,12 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
27
+ if StringScanner::Version < "3.0.8"
28
+ module StringScannerCaptures
29
+ refine StringScanner do
30
+ def captures
31
+ values_at(*(1...size))
32
+ end
33
+ end
34
+ end
35
+ using StringScannerCaptures
36
+ end
37
+
10
38
  # = Using the Pull Parser
11
39
  # <em>This API is experimental, and subject to change.</em>
12
40
  # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ -113,21 +141,31 @@ module REXML
113
141
  }
114
142
 
115
143
  module Private
116
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
117
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
118
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
119
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
120
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
121
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
122
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
123
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
+ CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
154
+ DEFAULT_ENTITIES_PATTERNS = {}
155
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
+ default_entities.each do |term|
157
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
+ end
124
159
  end
125
160
  private_constant :Private
126
- include Private
127
161
 
128
162
  def initialize( source )
129
163
  self.stream = source
130
164
  @listeners = []
165
+ @prefixes = Set.new
166
+ @entity_expansion_count = 0
167
+ @entity_expansion_limit = Security.entity_expansion_limit
168
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
131
169
  end
132
170
 
133
171
  def add_listener( listener )
@@ -135,15 +173,20 @@ module REXML
135
173
  end
136
174
 
137
175
  attr_reader :source
176
+ attr_reader :entity_expansion_count
177
+ attr_writer :entity_expansion_limit
178
+ attr_writer :entity_expansion_text_limit
138
179
 
139
180
  def stream=( source )
140
181
  @source = SourceFactory.create_from( source )
141
182
  @closed = nil
183
+ @have_root = false
142
184
  @document_status = nil
143
185
  @tags = []
144
186
  @stack = []
145
187
  @entities = []
146
- @nsstack = []
188
+ @namespaces = {}
189
+ @namespaces_restore_stack = []
147
190
  end
148
191
 
149
192
  def position
@@ -193,6 +236,8 @@ module REXML
193
236
 
194
237
  # Returns the next event. This is a +PullEvent+ object.
195
238
  def pull
239
+ @source.drop_parsed_content
240
+
196
241
  pull_event.tap do |event|
197
242
  @listeners.each do |listener|
198
243
  listener.receive event
@@ -205,7 +250,16 @@ module REXML
205
250
  x, @closed = @closed, nil
206
251
  return [ :end_element, x ]
207
252
  end
208
- return [ :end_document ] if empty?
253
+ if empty?
254
+ if @document_status == :in_doctype
255
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
256
+ end
257
+ unless @tags.empty?
258
+ path = "/" + @tags.join("/")
259
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
260
+ end
261
+ return [ :end_document ]
262
+ end
209
263
  return @stack.shift if @stack.size > 0
210
264
  #STDERR.puts @source.encoding
211
265
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -214,10 +268,17 @@ module REXML
214
268
  if @document_status == nil
215
269
  start_position = @source.position
216
270
  if @source.match("<?", true)
217
- return process_instruction(start_position)
271
+ return process_instruction
218
272
  elsif @source.match("<!", true)
219
273
  if @source.match("--", true)
220
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
274
+ md = @source.match(/(.*?)-->/um, true)
275
+ if md.nil?
276
+ raise REXML::ParseException.new("Unclosed comment", @source)
277
+ end
278
+ if /--|-\z/.match?(md[1])
279
+ raise REXML::ParseException.new("Malformed comment", @source)
280
+ end
281
+ return [ :comment, md[1] ]
221
282
  elsif @source.match("DOCTYPE", true)
222
283
  base_error_message = "Malformed DOCTYPE"
223
284
  unless @source.match(/\s+/um, true)
@@ -229,7 +290,6 @@ module REXML
229
290
  @source.position = start_position
230
291
  raise REXML::ParseException.new(message, @source)
231
292
  end
232
- @nsstack.unshift(curr_ns=Set.new)
233
293
  name = parse_name(base_error_message)
234
294
  if @source.match(/\s*\[/um, true)
235
295
  id = [nil, nil, nil]
@@ -277,7 +337,11 @@ module REXML
277
337
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
278
338
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
279
339
  elsif @source.match("ENTITY", true)
280
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
340
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
341
+ unless match_data
342
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
343
+ end
344
+ match = [:entitydecl, *match_data.captures.compact]
281
345
  ref = false
282
346
  if match[1] == '%'
283
347
  ref = true
@@ -295,6 +359,8 @@ module REXML
295
359
  match[4] = match[4][1..-2] # HREF
296
360
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
297
361
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
362
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
363
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
298
364
  else
299
365
  match[2] = match[2][1..-2]
300
366
  match.pop if match.size == 4
@@ -303,13 +369,13 @@ module REXML
303
369
  match << '%' if ref
304
370
  return match
305
371
  elsif @source.match("ATTLIST", true)
306
- md = @source.match(ATTLISTDECL_END, true)
372
+ md = @source.match(Private::ATTLISTDECL_END, true)
307
373
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
308
374
  element = md[1]
309
375
  contents = md[0]
310
376
 
311
377
  pairs = {}
312
- values = md[0].scan( ATTDEF_RE )
378
+ values = md[0].strip.scan( ATTDEF_RE )
313
379
  values.each do |attdef|
314
380
  unless attdef[3] == "#IMPLIED"
315
381
  attdef.compact!
@@ -317,7 +383,7 @@ module REXML
317
383
  val = attdef[4] if val == "#FIXED "
318
384
  pairs[attdef[0]] = val
319
385
  if attdef[0] =~ /^xmlns:(.*)/
320
- @nsstack[0] << $1
386
+ @namespaces[$1] = val
321
387
  end
322
388
  end
323
389
  end
@@ -355,6 +421,9 @@ module REXML
355
421
  @document_status = :after_doctype
356
422
  return [ :end_doctype ]
357
423
  end
424
+ if @document_status == :in_doctype
425
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
426
+ end
358
427
  end
359
428
  if @document_status == :after_doctype
360
429
  @source.match(/\s*/um, true)
@@ -362,10 +431,14 @@ module REXML
362
431
  begin
363
432
  start_position = @source.position
364
433
  if @source.match("<", true)
434
+ # :text's read_until may remain only "<" in buffer. In the
435
+ # case, buffer is empty here. So we need to fill buffer
436
+ # here explicitly.
437
+ @source.ensure_buffer
365
438
  if @source.match("/", true)
366
- @nsstack.shift
439
+ @namespaces_restore_stack.pop
367
440
  last_tag = @tags.pop
368
- md = @source.match(CLOSE_PATTERN, true)
441
+ md = @source.match(Private::CLOSE_PATTERN, true)
369
442
  if md and !last_tag
370
443
  message = "Unexpected top-level end tag (got '#{md[1]}')"
371
444
  raise REXML::ParseException.new(message, @source)
@@ -384,12 +457,11 @@ module REXML
384
457
  if md[0][0] == ?-
385
458
  md = @source.match(/--(.*?)-->/um, true)
386
459
 
387
- case md[1]
388
- when /--/, /-\z/
460
+ if md.nil? || /--|-\z/.match?(md[1])
389
461
  raise REXML::ParseException.new("Malformed comment", @source)
390
462
  end
391
463
 
392
- return [ :comment, md[1] ] if md
464
+ return [ :comment, md[1] ]
393
465
  else
394
466
  md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
395
467
  return [ :cdata, md[1] ] if md
@@ -397,38 +469,54 @@ module REXML
397
469
  raise REXML::ParseException.new( "Declarations can only occur "+
398
470
  "in the doctype declaration.", @source)
399
471
  elsif @source.match("?", true)
400
- return process_instruction(start_position)
472
+ return process_instruction
401
473
  else
402
474
  # Get the next tag
403
- md = @source.match(TAG_PATTERN, true)
475
+ md = @source.match(Private::TAG_PATTERN, true)
404
476
  unless md
405
477
  @source.position = start_position
406
478
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
407
479
  end
408
480
  tag = md[1]
409
481
  @document_status = :in_element
410
- prefixes = Set.new
411
- prefixes << md[2] if md[2]
412
- @nsstack.unshift(curr_ns=Set.new)
413
- attributes, closed = parse_attributes(prefixes, curr_ns)
482
+ @prefixes.clear
483
+ @prefixes << md[2] if md[2]
484
+ push_namespaces_restore
485
+ attributes, closed = parse_attributes(@prefixes)
414
486
  # Verify that all of the prefixes have been defined
415
- for prefix in prefixes
416
- unless @nsstack.find{|k| k.member?(prefix)}
487
+ for prefix in @prefixes
488
+ unless @namespaces.key?(prefix)
417
489
  raise UndefinedNamespaceException.new(prefix,@source,self)
418
490
  end
419
491
  end
420
492
 
421
493
  if closed
422
494
  @closed = tag
423
- @nsstack.shift
495
+ pop_namespaces_restore
424
496
  else
497
+ if @tags.empty? and @have_root
498
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
499
+ end
425
500
  @tags.push( tag )
426
501
  end
502
+ @have_root = true
427
503
  return [ :start_element, tag, attributes ]
428
504
  end
429
505
  else
430
- md = @source.match(/([^<]*)/um, true)
431
- text = md[1]
506
+ text = @source.read_until("<")
507
+ if text.chomp!("<")
508
+ @source.position -= "<".bytesize
509
+ end
510
+ if @tags.empty?
511
+ unless /\A\s*\z/.match?(text)
512
+ if @have_root
513
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
514
+ else
515
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
516
+ end
517
+ end
518
+ return pull_event if @have_root
519
+ end
432
520
  return [ :text, text ]
433
521
  end
434
522
  rescue REXML::UndefinedNamespaceException
@@ -444,13 +532,13 @@ module REXML
444
532
  private :pull_event
445
533
 
446
534
  def entity( reference, entities )
447
- value = nil
448
- value = entities[ reference ] if entities
449
- if not value
450
- value = DEFAULT_ENTITIES[ reference ]
451
- value = value[2] if value
452
- end
453
- unnormalize( value, entities ) if value
535
+ return unless entities
536
+
537
+ value = entities[ reference ]
538
+ return if value.nil?
539
+
540
+ record_entity_expansion
541
+ unnormalize( value, entities )
454
542
  end
455
543
 
456
544
  # Escapes all possible entities
@@ -471,34 +559,83 @@ module REXML
471
559
 
472
560
  # Unescapes all possible entities
473
561
  def unnormalize( string, entities=nil, filter=nil )
474
- rv = string.gsub( /\r\n?/, "\n" )
562
+ if string.include?("\r")
563
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
564
+ else
565
+ rv = string.dup
566
+ end
475
567
  matches = rv.scan( REFERENCE_RE )
476
568
  return rv if matches.size == 0
477
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
569
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
478
570
  m=$1
479
571
  m = "0#{m}" if m[0] == ?x
480
572
  [Integer(m)].pack('U*')
481
573
  }
482
574
  matches.collect!{|x|x[0]}.compact!
575
+ if filter
576
+ matches.reject! do |entity_reference|
577
+ filter.include?(entity_reference)
578
+ end
579
+ end
483
580
  if matches.size > 0
484
- matches.each do |entity_reference|
485
- unless filter and filter.include?(entity_reference)
486
- entity_value = entity( entity_reference, entities )
487
- if entity_value
488
- re = /&#{entity_reference};/
489
- rv.gsub!( re, entity_value )
490
- else
491
- er = DEFAULT_ENTITIES[entity_reference]
492
- rv.gsub!( er[0], er[2] ) if er
581
+ matches.tally.each do |entity_reference, n|
582
+ entity_expansion_count_before = @entity_expansion_count
583
+ entity_value = entity( entity_reference, entities )
584
+ if entity_value
585
+ if n > 1
586
+ entity_expansion_count_delta =
587
+ @entity_expansion_count - entity_expansion_count_before
588
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
589
+ end
590
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
591
+ rv.gsub!( re, entity_value )
592
+ if rv.bytesize > @entity_expansion_text_limit
593
+ raise "entity expansion has grown too large"
493
594
  end
595
+ else
596
+ er = DEFAULT_ENTITIES[entity_reference]
597
+ rv.gsub!( er[0], er[2] ) if er
494
598
  end
495
599
  end
496
- rv.gsub!( /&amp;/, '&' )
600
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
497
601
  end
498
602
  rv
499
603
  end
500
604
 
501
605
  private
606
+ def add_namespace(prefix, uri)
607
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
608
+ if uri.nil?
609
+ @namespaces.delete(prefix)
610
+ else
611
+ @namespaces[prefix] = uri
612
+ end
613
+ end
614
+
615
+ def push_namespaces_restore
616
+ namespaces_restore = {}
617
+ @namespaces_restore_stack.push(namespaces_restore)
618
+ namespaces_restore
619
+ end
620
+
621
+ def pop_namespaces_restore
622
+ namespaces_restore = @namespaces_restore_stack.pop
623
+ namespaces_restore.each do |prefix, uri|
624
+ if uri.nil?
625
+ @namespaces.delete(prefix)
626
+ else
627
+ @namespaces[prefix] = uri
628
+ end
629
+ end
630
+ end
631
+
632
+ def record_entity_expansion(delta=1)
633
+ @entity_expansion_count += delta
634
+ if @entity_expansion_count > @entity_expansion_limit
635
+ raise "number of entity expansions exceeded, processing aborted."
636
+ end
637
+ end
638
+
502
639
  def need_source_encoding_update?(xml_declaration_encoding)
503
640
  return false if xml_declaration_encoding.nil?
504
641
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -506,16 +643,16 @@ module REXML
506
643
  end
507
644
 
508
645
  def parse_name(base_error_message)
509
- md = @source.match(NAME_PATTERN, true)
646
+ md = @source.match(Private::NAME_PATTERN, true)
510
647
  unless md
511
- if @source.match(/\s*\S/um)
648
+ if @source.match(/\S/um)
512
649
  message = "#{base_error_message}: invalid name"
513
650
  else
514
651
  message = "#{base_error_message}: name is missing"
515
652
  end
516
653
  raise REXML::ParseException.new(message, @source)
517
654
  end
518
- md[1]
655
+ md[0]
519
656
  end
520
657
 
521
658
  def parse_id(base_error_message,
@@ -584,15 +721,24 @@ module REXML
584
721
  end
585
722
  end
586
723
 
587
- def process_instruction(start_position)
588
- match_data = @source.match(INSTRUCTION_END, true)
589
- unless match_data
590
- message = "Invalid processing instruction node"
591
- @source.position = start_position
592
- raise REXML::ParseException.new(message, @source)
724
+ def process_instruction
725
+ name = parse_name("Malformed XML: Invalid processing instruction node")
726
+ if @source.match(/\s+/um, true)
727
+ match_data = @source.match(/(.*?)\?>/um, true)
728
+ unless match_data
729
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
730
+ end
731
+ content = match_data[1]
732
+ else
733
+ content = nil
734
+ unless @source.match("?>", true)
735
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
736
+ end
593
737
  end
594
- if @document_status.nil? and match_data[1] == "xml"
595
- content = match_data[2]
738
+ if name == "xml"
739
+ if @document_status
740
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
741
+ end
596
742
  version = VERSION.match(content)
597
743
  version = version[1] unless version.nil?
598
744
  encoding = ENCODING.match(content)
@@ -607,11 +753,12 @@ module REXML
607
753
  standalone = standalone[1] unless standalone.nil?
608
754
  return [ :xmldecl, version, encoding, standalone ]
609
755
  end
610
- [:processing_instruction, match_data[1], match_data[2]]
756
+ [:processing_instruction, name, content]
611
757
  end
612
758
 
613
- def parse_attributes(prefixes, curr_ns)
759
+ def parse_attributes(prefixes)
614
760
  attributes = {}
761
+ expanded_names = {}
615
762
  closed = false
616
763
  while true
617
764
  if @source.match(">", true)
@@ -633,8 +780,10 @@ module REXML
633
780
  raise REXML::ParseException.new(message, @source)
634
781
  end
635
782
  quote = match[1]
783
+ start_position = @source.position
636
784
  value = @source.read_until(quote)
637
785
  unless value.chomp!(quote)
786
+ @source.position = start_position
638
787
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
639
788
  raise REXML::ParseException.new(message, @source)
640
789
  end
@@ -651,7 +800,7 @@ module REXML
651
800
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
652
801
  raise REXML::ParseException.new( msg, @source, self)
653
802
  end
654
- curr_ns << local_part
803
+ add_namespace(local_part, value)
655
804
  elsif prefix
656
805
  prefixes << prefix unless prefix == "xml"
657
806
  end
@@ -661,6 +810,20 @@ module REXML
661
810
  raise REXML::ParseException.new(msg, @source, self)
662
811
  end
663
812
 
813
+ unless prefix == "xmlns"
814
+ uri = @namespaces[prefix]
815
+ expanded_name = [uri, local_part]
816
+ existing_prefix = expanded_names[expanded_name]
817
+ if existing_prefix
818
+ message = "Namespace conflict in adding attribute " +
819
+ "\"#{local_part}\": " +
820
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
821
+ "prefix \"#{prefix}\" = \"#{uri}\""
822
+ raise REXML::ParseException.new(message, @source, self)
823
+ end
824
+ expanded_names[expanded_name] = prefix
825
+ end
826
+
664
827
  attributes[name] = value
665
828
  else
666
829
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -157,25 +169,8 @@ module REXML
157
169
  end
158
170
  end
159
171
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
172
+ unnormalized = @parser.unnormalize( event[1], @entities )
173
+ handle( :characters, unnormalized )
179
174
  when :entitydecl
180
175
  handle_entitydecl( event )
181
176
  when :processing_instruction, :comment, :attlistdecl,
@@ -7,37 +7,42 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
45
+ @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
43
48
  when :start_doctype
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]