rexml 3.2.8 → 3.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
27
+ if StringScanner::Version < "3.0.8"
28
+ module StringScannerCaptures
29
+ refine StringScanner do
30
+ def captures
31
+ values_at(*(1...size))
32
+ end
33
+ end
34
+ end
35
+ using StringScannerCaptures
36
+ end
37
+
10
38
  # = Using the Pull Parser
11
39
  # <em>This API is experimental, and subject to change.</em>
12
40
  # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ -113,21 +141,33 @@ module REXML
113
141
  }
114
142
 
115
143
  module Private
116
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
117
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
118
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
119
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
120
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
121
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
122
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
123
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154
+ DEFAULT_ENTITIES_PATTERNS = {}
155
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
+ default_entities.each do |term|
157
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
+ end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
124
160
  end
125
161
  private_constant :Private
126
- include Private
127
162
 
128
163
  def initialize( source )
129
164
  self.stream = source
130
165
  @listeners = []
166
+ @prefixes = Set.new
167
+ @entity_expansion_count = 0
168
+ @entity_expansion_limit = Security.entity_expansion_limit
169
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
131
171
  end
132
172
 
133
173
  def add_listener( listener )
@@ -135,15 +175,20 @@ module REXML
135
175
  end
136
176
 
137
177
  attr_reader :source
178
+ attr_reader :entity_expansion_count
179
+ attr_writer :entity_expansion_limit
180
+ attr_writer :entity_expansion_text_limit
138
181
 
139
182
  def stream=( source )
140
183
  @source = SourceFactory.create_from( source )
141
184
  @closed = nil
185
+ @have_root = false
142
186
  @document_status = nil
143
187
  @tags = []
144
188
  @stack = []
145
189
  @entities = []
146
- @nsstack = []
190
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
191
+ @namespaces_restore_stack = []
147
192
  end
148
193
 
149
194
  def position
@@ -193,6 +238,8 @@ module REXML
193
238
 
194
239
  # Returns the next event. This is a +PullEvent+ object.
195
240
  def pull
241
+ @source.drop_parsed_content
242
+
196
243
  pull_event.tap do |event|
197
244
  @listeners.each do |listener|
198
245
  listener.receive event
@@ -205,7 +252,16 @@ module REXML
205
252
  x, @closed = @closed, nil
206
253
  return [ :end_element, x ]
207
254
  end
208
- return [ :end_document ] if empty?
255
+ if empty?
256
+ if @document_status == :in_doctype
257
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
258
+ end
259
+ unless @tags.empty?
260
+ path = "/" + @tags.join("/")
261
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
262
+ end
263
+ return [ :end_document ]
264
+ end
209
265
  return @stack.shift if @stack.size > 0
210
266
  #STDERR.puts @source.encoding
211
267
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -214,10 +270,17 @@ module REXML
214
270
  if @document_status == nil
215
271
  start_position = @source.position
216
272
  if @source.match("<?", true)
217
- return process_instruction(start_position)
273
+ return process_instruction
218
274
  elsif @source.match("<!", true)
219
275
  if @source.match("--", true)
220
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
276
+ md = @source.match(/(.*?)-->/um, true)
277
+ if md.nil?
278
+ raise REXML::ParseException.new("Unclosed comment", @source)
279
+ end
280
+ if /--|-\z/.match?(md[1])
281
+ raise REXML::ParseException.new("Malformed comment", @source)
282
+ end
283
+ return [ :comment, md[1] ]
221
284
  elsif @source.match("DOCTYPE", true)
222
285
  base_error_message = "Malformed DOCTYPE"
223
286
  unless @source.match(/\s+/um, true)
@@ -229,7 +292,6 @@ module REXML
229
292
  @source.position = start_position
230
293
  raise REXML::ParseException.new(message, @source)
231
294
  end
232
- @nsstack.unshift(curr_ns=Set.new)
233
295
  name = parse_name(base_error_message)
234
296
  if @source.match(/\s*\[/um, true)
235
297
  id = [nil, nil, nil]
@@ -277,7 +339,11 @@ module REXML
277
339
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
278
340
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
279
341
  elsif @source.match("ENTITY", true)
280
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
342
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
343
+ unless match_data
344
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
345
+ end
346
+ match = [:entitydecl, *match_data.captures.compact]
281
347
  ref = false
282
348
  if match[1] == '%'
283
349
  ref = true
@@ -295,6 +361,8 @@ module REXML
295
361
  match[4] = match[4][1..-2] # HREF
296
362
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
297
363
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
364
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
365
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
298
366
  else
299
367
  match[2] = match[2][1..-2]
300
368
  match.pop if match.size == 4
@@ -303,13 +371,13 @@ module REXML
303
371
  match << '%' if ref
304
372
  return match
305
373
  elsif @source.match("ATTLIST", true)
306
- md = @source.match(ATTLISTDECL_END, true)
374
+ md = @source.match(Private::ATTLISTDECL_END, true)
307
375
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
308
376
  element = md[1]
309
377
  contents = md[0]
310
378
 
311
379
  pairs = {}
312
- values = md[0].scan( ATTDEF_RE )
380
+ values = md[0].strip.scan( ATTDEF_RE )
313
381
  values.each do |attdef|
314
382
  unless attdef[3] == "#IMPLIED"
315
383
  attdef.compact!
@@ -317,7 +385,7 @@ module REXML
317
385
  val = attdef[4] if val == "#FIXED "
318
386
  pairs[attdef[0]] = val
319
387
  if attdef[0] =~ /^xmlns:(.*)/
320
- @nsstack[0] << $1
388
+ @namespaces[$1] = val
321
389
  end
322
390
  end
323
391
  end
@@ -355,6 +423,9 @@ module REXML
355
423
  @document_status = :after_doctype
356
424
  return [ :end_doctype ]
357
425
  end
426
+ if @document_status == :in_doctype
427
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
428
+ end
358
429
  end
359
430
  if @document_status == :after_doctype
360
431
  @source.match(/\s*/um, true)
@@ -362,10 +433,14 @@ module REXML
362
433
  begin
363
434
  start_position = @source.position
364
435
  if @source.match("<", true)
436
+ # :text's read_until may remain only "<" in buffer. In the
437
+ # case, buffer is empty here. So we need to fill buffer
438
+ # here explicitly.
439
+ @source.ensure_buffer
365
440
  if @source.match("/", true)
366
- @nsstack.shift
441
+ @namespaces_restore_stack.pop
367
442
  last_tag = @tags.pop
368
- md = @source.match(CLOSE_PATTERN, true)
443
+ md = @source.match(Private::CLOSE_PATTERN, true)
369
444
  if md and !last_tag
370
445
  message = "Unexpected top-level end tag (got '#{md[1]}')"
371
446
  raise REXML::ParseException.new(message, @source)
@@ -384,12 +459,11 @@ module REXML
384
459
  if md[0][0] == ?-
385
460
  md = @source.match(/--(.*?)-->/um, true)
386
461
 
387
- case md[1]
388
- when /--/, /-\z/
462
+ if md.nil? || /--|-\z/.match?(md[1])
389
463
  raise REXML::ParseException.new("Malformed comment", @source)
390
464
  end
391
465
 
392
- return [ :comment, md[1] ] if md
466
+ return [ :comment, md[1] ]
393
467
  else
394
468
  md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
395
469
  return [ :cdata, md[1] ] if md
@@ -397,38 +471,54 @@ module REXML
397
471
  raise REXML::ParseException.new( "Declarations can only occur "+
398
472
  "in the doctype declaration.", @source)
399
473
  elsif @source.match("?", true)
400
- return process_instruction(start_position)
474
+ return process_instruction
401
475
  else
402
476
  # Get the next tag
403
- md = @source.match(TAG_PATTERN, true)
477
+ md = @source.match(Private::TAG_PATTERN, true)
404
478
  unless md
405
479
  @source.position = start_position
406
480
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
407
481
  end
408
482
  tag = md[1]
409
483
  @document_status = :in_element
410
- prefixes = Set.new
411
- prefixes << md[2] if md[2]
412
- @nsstack.unshift(curr_ns=Set.new)
413
- attributes, closed = parse_attributes(prefixes, curr_ns)
484
+ @prefixes.clear
485
+ @prefixes << md[2] if md[2]
486
+ push_namespaces_restore
487
+ attributes, closed = parse_attributes(@prefixes)
414
488
  # Verify that all of the prefixes have been defined
415
- for prefix in prefixes
416
- unless @nsstack.find{|k| k.member?(prefix)}
489
+ for prefix in @prefixes
490
+ unless @namespaces.key?(prefix)
417
491
  raise UndefinedNamespaceException.new(prefix,@source,self)
418
492
  end
419
493
  end
420
494
 
421
495
  if closed
422
496
  @closed = tag
423
- @nsstack.shift
497
+ pop_namespaces_restore
424
498
  else
499
+ if @tags.empty? and @have_root
500
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
501
+ end
425
502
  @tags.push( tag )
426
503
  end
504
+ @have_root = true
427
505
  return [ :start_element, tag, attributes ]
428
506
  end
429
507
  else
430
- md = @source.match(/([^<]*)/um, true)
431
- text = md[1]
508
+ text = @source.read_until("<")
509
+ if text.chomp!("<")
510
+ @source.position -= "<".bytesize
511
+ end
512
+ if @tags.empty?
513
+ unless /\A\s*\z/.match?(text)
514
+ if @have_root
515
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
516
+ else
517
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
518
+ end
519
+ end
520
+ return pull_event if @have_root
521
+ end
432
522
  return [ :text, text ]
433
523
  end
434
524
  rescue REXML::UndefinedNamespaceException
@@ -444,13 +534,13 @@ module REXML
444
534
  private :pull_event
445
535
 
446
536
  def entity( reference, entities )
447
- value = nil
448
- value = entities[ reference ] if entities
449
- if not value
450
- value = DEFAULT_ENTITIES[ reference ]
451
- value = value[2] if value
452
- end
453
- unnormalize( value, entities ) if value
537
+ return unless entities
538
+
539
+ value = entities[ reference ]
540
+ return if value.nil?
541
+
542
+ record_entity_expansion
543
+ unnormalize( value, entities )
454
544
  end
455
545
 
456
546
  # Escapes all possible entities
@@ -471,34 +561,87 @@ module REXML
471
561
 
472
562
  # Unescapes all possible entities
473
563
  def unnormalize( string, entities=nil, filter=nil )
474
- rv = string.gsub( /\r\n?/, "\n" )
564
+ if string.include?("\r")
565
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
566
+ else
567
+ rv = string.dup
568
+ end
475
569
  matches = rv.scan( REFERENCE_RE )
476
570
  return rv if matches.size == 0
477
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
571
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
478
572
  m=$1
479
- m = "0#{m}" if m[0] == ?x
480
- [Integer(m)].pack('U*')
573
+ if m.start_with?("x")
574
+ code_point = Integer(m[1..-1], 16)
575
+ else
576
+ code_point = Integer(m, 10)
577
+ end
578
+ [code_point].pack('U*')
481
579
  }
482
580
  matches.collect!{|x|x[0]}.compact!
581
+ if filter
582
+ matches.reject! do |entity_reference|
583
+ filter.include?(entity_reference)
584
+ end
585
+ end
483
586
  if matches.size > 0
484
- matches.each do |entity_reference|
485
- unless filter and filter.include?(entity_reference)
486
- entity_value = entity( entity_reference, entities )
487
- if entity_value
488
- re = /&#{entity_reference};/
489
- rv.gsub!( re, entity_value )
490
- else
491
- er = DEFAULT_ENTITIES[entity_reference]
492
- rv.gsub!( er[0], er[2] ) if er
587
+ matches.tally.each do |entity_reference, n|
588
+ entity_expansion_count_before = @entity_expansion_count
589
+ entity_value = entity( entity_reference, entities )
590
+ if entity_value
591
+ if n > 1
592
+ entity_expansion_count_delta =
593
+ @entity_expansion_count - entity_expansion_count_before
594
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
595
+ end
596
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
597
+ rv.gsub!( re, entity_value )
598
+ if rv.bytesize > @entity_expansion_text_limit
599
+ raise "entity expansion has grown too large"
493
600
  end
601
+ else
602
+ er = DEFAULT_ENTITIES[entity_reference]
603
+ rv.gsub!( er[0], er[2] ) if er
494
604
  end
495
605
  end
496
- rv.gsub!( /&amp;/, '&' )
606
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
497
607
  end
498
608
  rv
499
609
  end
500
610
 
501
611
  private
612
+ def add_namespace(prefix, uri)
613
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
614
+ if uri.nil?
615
+ @namespaces.delete(prefix)
616
+ else
617
+ @namespaces[prefix] = uri
618
+ end
619
+ end
620
+
621
+ def push_namespaces_restore
622
+ namespaces_restore = {}
623
+ @namespaces_restore_stack.push(namespaces_restore)
624
+ namespaces_restore
625
+ end
626
+
627
+ def pop_namespaces_restore
628
+ namespaces_restore = @namespaces_restore_stack.pop
629
+ namespaces_restore.each do |prefix, uri|
630
+ if uri.nil?
631
+ @namespaces.delete(prefix)
632
+ else
633
+ @namespaces[prefix] = uri
634
+ end
635
+ end
636
+ end
637
+
638
+ def record_entity_expansion(delta=1)
639
+ @entity_expansion_count += delta
640
+ if @entity_expansion_count > @entity_expansion_limit
641
+ raise "number of entity expansions exceeded, processing aborted."
642
+ end
643
+ end
644
+
502
645
  def need_source_encoding_update?(xml_declaration_encoding)
503
646
  return false if xml_declaration_encoding.nil?
504
647
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -506,16 +649,16 @@ module REXML
506
649
  end
507
650
 
508
651
  def parse_name(base_error_message)
509
- md = @source.match(NAME_PATTERN, true)
652
+ md = @source.match(Private::NAME_PATTERN, true)
510
653
  unless md
511
- if @source.match(/\s*\S/um)
654
+ if @source.match(/\S/um)
512
655
  message = "#{base_error_message}: invalid name"
513
656
  else
514
657
  message = "#{base_error_message}: name is missing"
515
658
  end
516
659
  raise REXML::ParseException.new(message, @source)
517
660
  end
518
- md[1]
661
+ md[0]
519
662
  end
520
663
 
521
664
  def parse_id(base_error_message,
@@ -584,15 +727,24 @@ module REXML
584
727
  end
585
728
  end
586
729
 
587
- def process_instruction(start_position)
588
- match_data = @source.match(INSTRUCTION_END, true)
589
- unless match_data
590
- message = "Invalid processing instruction node"
591
- @source.position = start_position
592
- raise REXML::ParseException.new(message, @source)
730
+ def process_instruction
731
+ name = parse_name("Malformed XML: Invalid processing instruction node")
732
+ if @source.match(/\s+/um, true)
733
+ match_data = @source.match(/(.*?)\?>/um, true)
734
+ unless match_data
735
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
736
+ end
737
+ content = match_data[1]
738
+ else
739
+ content = nil
740
+ unless @source.match("?>", true)
741
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
742
+ end
593
743
  end
594
- if @document_status.nil? and match_data[1] == "xml"
595
- content = match_data[2]
744
+ if name == "xml"
745
+ if @document_status
746
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
747
+ end
596
748
  version = VERSION.match(content)
597
749
  version = version[1] unless version.nil?
598
750
  encoding = ENCODING.match(content)
@@ -607,11 +759,12 @@ module REXML
607
759
  standalone = standalone[1] unless standalone.nil?
608
760
  return [ :xmldecl, version, encoding, standalone ]
609
761
  end
610
- [:processing_instruction, match_data[1], match_data[2]]
762
+ [:processing_instruction, name, content]
611
763
  end
612
764
 
613
- def parse_attributes(prefixes, curr_ns)
765
+ def parse_attributes(prefixes)
614
766
  attributes = {}
767
+ expanded_names = {}
615
768
  closed = false
616
769
  while true
617
770
  if @source.match(">", true)
@@ -633,15 +786,17 @@ module REXML
633
786
  raise REXML::ParseException.new(message, @source)
634
787
  end
635
788
  quote = match[1]
789
+ start_position = @source.position
636
790
  value = @source.read_until(quote)
637
791
  unless value.chomp!(quote)
792
+ @source.position = start_position
638
793
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
639
794
  raise REXML::ParseException.new(message, @source)
640
795
  end
641
796
  @source.match(/\s*/um, true)
642
797
  if prefix == "xmlns"
643
798
  if local_part == "xml"
644
- if value != "http://www.w3.org/XML/1998/namespace"
799
+ if value != Private::XML_PREFIXED_NAMESPACE
645
800
  msg = "The 'xml' prefix must not be bound to any other namespace "+
646
801
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
647
802
  raise REXML::ParseException.new( msg, @source, self )
@@ -651,7 +806,7 @@ module REXML
651
806
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
652
807
  raise REXML::ParseException.new( msg, @source, self)
653
808
  end
654
- curr_ns << local_part
809
+ add_namespace(local_part, value)
655
810
  elsif prefix
656
811
  prefixes << prefix unless prefix == "xml"
657
812
  end
@@ -661,6 +816,20 @@ module REXML
661
816
  raise REXML::ParseException.new(msg, @source, self)
662
817
  end
663
818
 
819
+ unless prefix == "xmlns"
820
+ uri = @namespaces[prefix]
821
+ expanded_name = [uri, local_part]
822
+ existing_prefix = expanded_names[expanded_name]
823
+ if existing_prefix
824
+ message = "Namespace conflict in adding attribute " +
825
+ "\"#{local_part}\": " +
826
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
827
+ "prefix \"#{prefix}\" = \"#{uri}\""
828
+ raise REXML::ParseException.new(message, @source, self)
829
+ end
830
+ expanded_names[expanded_name] = prefix
831
+ end
832
+
664
833
  attributes[name] = value
665
834
  else
666
835
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -157,25 +169,8 @@ module REXML
157
169
  end
158
170
  end
159
171
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
172
+ unnormalized = @parser.unnormalize( event[1], @entities )
173
+ handle( :characters, unnormalized )
179
174
  when :entitydecl
180
175
  handle_entitydecl( event )
181
176
  when :processing_instruction, :comment, :attlistdecl,
@@ -264,6 +259,8 @@ module REXML
264
259
  end
265
260
 
266
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
267
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
268
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
269
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -7,37 +7,42 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
45
+ @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
43
48
  when :start_doctype
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]