rexml 3.3.2 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
 - data/NEWS.md +200 -0
 - data/lib/rexml/attribute.rb +3 -2
 - data/lib/rexml/document.rb +5 -1
 - data/lib/rexml/element.rb +14 -16
 - data/lib/rexml/entity.rb +9 -48
 - data/lib/rexml/parsers/baseparser.rb +206 -101
 - data/lib/rexml/parsers/pullparser.rb +16 -0
 - data/lib/rexml/parsers/sax2parser.rb +14 -0
 - data/lib/rexml/parsers/streamparser.rb +15 -9
 - data/lib/rexml/parsers/treeparser.rb +0 -7
 - data/lib/rexml/rexml.rb +1 -1
 - data/lib/rexml/source.rb +63 -12
 - data/lib/rexml/text.rb +20 -43
 - metadata +8 -19
 
| 
         @@ -1,12 +1,29 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         
             
            require_relative '../parseexception'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require_relative '../undefinednamespaceexception'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative '../security'
         
     | 
| 
       4 
5 
     | 
    
         
             
            require_relative '../source'
         
     | 
| 
       5 
6 
     | 
    
         
             
            require 'set'
         
     | 
| 
       6 
7 
     | 
    
         
             
            require "strscan"
         
     | 
| 
       7 
8 
     | 
    
         | 
| 
       8 
9 
     | 
    
         
             
            module REXML
         
     | 
| 
       9 
10 
     | 
    
         
             
              module Parsers
         
     | 
| 
      
 11 
     | 
    
         
            +
                unless [].respond_to?(:tally)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  module EnumerableTally
         
     | 
| 
      
 13 
     | 
    
         
            +
                    refine Enumerable do
         
     | 
| 
      
 14 
     | 
    
         
            +
                      def tally
         
     | 
| 
      
 15 
     | 
    
         
            +
                        counts = {}
         
     | 
| 
      
 16 
     | 
    
         
            +
                        each do |item|
         
     | 
| 
      
 17 
     | 
    
         
            +
                          counts[item] ||= 0
         
     | 
| 
      
 18 
     | 
    
         
            +
                          counts[item] += 1
         
     | 
| 
      
 19 
     | 
    
         
            +
                        end
         
     | 
| 
      
 20 
     | 
    
         
            +
                        counts
         
     | 
| 
      
 21 
     | 
    
         
            +
                      end
         
     | 
| 
      
 22 
     | 
    
         
            +
                    end
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
                  using EnumerableTally
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
       10 
27 
     | 
    
         
             
                if StringScanner::Version < "3.0.8"
         
     | 
| 
       11 
28 
     | 
    
         
             
                  module StringScannerCaptures
         
     | 
| 
       12 
29 
     | 
    
         
             
                    refine StringScanner do
         
     | 
| 
         @@ -124,29 +141,22 @@ module REXML 
     | 
|
| 
       124 
141 
     | 
    
         
             
                  }
         
     | 
| 
       125 
142 
     | 
    
         | 
| 
       126 
143 
     | 
    
         
             
                  module Private
         
     | 
| 
       127 
     | 
    
         
            -
                     
     | 
| 
       128 
     | 
    
         
            -
                    INSTRUCTION_TERM = "?>"
         
     | 
| 
       129 
     | 
    
         
            -
                    COMMENT_TERM = "-->"
         
     | 
| 
       130 
     | 
    
         
            -
                    CDATA_TERM = "]]>"
         
     | 
| 
       131 
     | 
    
         
            -
                    DOCTYPE_TERM = "]>"
         
     | 
| 
       132 
     | 
    
         
            -
                    # Read to the end of DOCTYPE because there is no proper ENTITY termination
         
     | 
| 
       133 
     | 
    
         
            -
                    ENTITY_TERM = DOCTYPE_TERM
         
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
                    INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
         
     | 
| 
      
 144 
     | 
    
         
            +
                    PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
         
     | 
| 
       136 
145 
     | 
    
         
             
                    TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
         
     | 
| 
       137 
146 
     | 
    
         
             
                    CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
         
     | 
| 
       138 
147 
     | 
    
         
             
                    ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
         
     | 
| 
       139 
     | 
    
         
            -
                    NAME_PATTERN =  
     | 
| 
      
 148 
     | 
    
         
            +
                    NAME_PATTERN = /#{NAME}/um
         
     | 
| 
       140 
149 
     | 
    
         
             
                    GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
         
     | 
| 
       141 
150 
     | 
    
         
             
                    PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
         
     | 
| 
       142 
151 
     | 
    
         
             
                    ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
         
     | 
| 
       143 
152 
     | 
    
         
             
                    CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
         
     | 
| 
       144 
     | 
    
         
            -
                    CHARACTER_REFERENCES = /&# 
     | 
| 
      
 153 
     | 
    
         
            +
                    CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
         
     | 
| 
       145 
154 
     | 
    
         
             
                    DEFAULT_ENTITIES_PATTERNS = {}
         
     | 
| 
       146 
155 
     | 
    
         
             
                    default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
         
     | 
| 
       147 
156 
     | 
    
         
             
                    default_entities.each do |term|
         
     | 
| 
       148 
157 
     | 
    
         
             
                      DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
         
     | 
| 
       149 
158 
     | 
    
         
             
                    end
         
     | 
| 
      
 159 
     | 
    
         
            +
                    XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
         
     | 
| 
       150 
160 
     | 
    
         
             
                  end
         
     | 
| 
       151 
161 
     | 
    
         
             
                  private_constant :Private
         
     | 
| 
       152 
162 
     | 
    
         | 
| 
         @@ -154,6 +164,10 @@ module REXML 
     | 
|
| 
       154 
164 
     | 
    
         
             
                    self.stream = source
         
     | 
| 
       155 
165 
     | 
    
         
             
                    @listeners = []
         
     | 
| 
       156 
166 
     | 
    
         
             
                    @prefixes = Set.new
         
     | 
| 
      
 167 
     | 
    
         
            +
                    @entity_expansion_count = 0
         
     | 
| 
      
 168 
     | 
    
         
            +
                    @entity_expansion_limit = Security.entity_expansion_limit
         
     | 
| 
      
 169 
     | 
    
         
            +
                    @entity_expansion_text_limit = Security.entity_expansion_text_limit
         
     | 
| 
      
 170 
     | 
    
         
            +
                    @source.ensure_buffer
         
     | 
| 
       157 
171 
     | 
    
         
             
                  end
         
     | 
| 
       158 
172 
     | 
    
         | 
| 
       159 
173 
     | 
    
         
             
                  def add_listener( listener )
         
     | 
| 
         @@ -161,16 +175,24 @@ module REXML 
     | 
|
| 
       161 
175 
     | 
    
         
             
                  end
         
     | 
| 
       162 
176 
     | 
    
         | 
| 
       163 
177 
     | 
    
         
             
                  attr_reader :source
         
     | 
| 
      
 178 
     | 
    
         
            +
                  attr_reader :entity_expansion_count
         
     | 
| 
      
 179 
     | 
    
         
            +
                  attr_writer :entity_expansion_limit
         
     | 
| 
      
 180 
     | 
    
         
            +
                  attr_writer :entity_expansion_text_limit
         
     | 
| 
       164 
181 
     | 
    
         | 
| 
       165 
182 
     | 
    
         
             
                  def stream=( source )
         
     | 
| 
       166 
183 
     | 
    
         
             
                    @source = SourceFactory.create_from( source )
         
     | 
| 
      
 184 
     | 
    
         
            +
                    reset
         
     | 
| 
      
 185 
     | 
    
         
            +
                  end
         
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
      
 187 
     | 
    
         
            +
                  def reset
         
     | 
| 
       167 
188 
     | 
    
         
             
                    @closed = nil
         
     | 
| 
       168 
189 
     | 
    
         
             
                    @have_root = false
         
     | 
| 
       169 
190 
     | 
    
         
             
                    @document_status = nil
         
     | 
| 
       170 
191 
     | 
    
         
             
                    @tags = []
         
     | 
| 
       171 
192 
     | 
    
         
             
                    @stack = []
         
     | 
| 
       172 
193 
     | 
    
         
             
                    @entities = []
         
     | 
| 
       173 
     | 
    
         
            -
                    @ 
     | 
| 
      
 194 
     | 
    
         
            +
                    @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
         
     | 
| 
      
 195 
     | 
    
         
            +
                    @namespaces_restore_stack = []
         
     | 
| 
       174 
196 
     | 
    
         
             
                  end
         
     | 
| 
       175 
197 
     | 
    
         | 
| 
       176 
198 
     | 
    
         
             
                  def position
         
     | 
| 
         @@ -238,6 +260,10 @@ module REXML 
     | 
|
| 
       238 
260 
     | 
    
         
             
                      if @document_status == :in_doctype
         
     | 
| 
       239 
261 
     | 
    
         
             
                        raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
         
     | 
| 
       240 
262 
     | 
    
         
             
                      end
         
     | 
| 
      
 263 
     | 
    
         
            +
                      unless @tags.empty?
         
     | 
| 
      
 264 
     | 
    
         
            +
                        path = "/" + @tags.join("/")
         
     | 
| 
      
 265 
     | 
    
         
            +
                        raise ParseException.new("Missing end tag for '#{path}'", @source)
         
     | 
| 
      
 266 
     | 
    
         
            +
                      end
         
     | 
| 
       241 
267 
     | 
    
         
             
                      return [ :end_document ]
         
     | 
| 
       242 
268 
     | 
    
         
             
                    end
         
     | 
| 
       243 
269 
     | 
    
         
             
                    return @stack.shift if @stack.size > 0
         
     | 
| 
         @@ -247,11 +273,11 @@ module REXML 
     | 
|
| 
       247 
273 
     | 
    
         
             
                    @source.ensure_buffer
         
     | 
| 
       248 
274 
     | 
    
         
             
                    if @document_status == nil
         
     | 
| 
       249 
275 
     | 
    
         
             
                      start_position = @source.position
         
     | 
| 
       250 
     | 
    
         
            -
                      if @source.match("<?", true)
         
     | 
| 
       251 
     | 
    
         
            -
                        return process_instruction 
     | 
| 
       252 
     | 
    
         
            -
                      elsif @source.match("<!", true)
         
     | 
| 
       253 
     | 
    
         
            -
                        if @source.match("--", true)
         
     | 
| 
       254 
     | 
    
         
            -
                          md = @source.match(/(.*?)-->/um, true 
     | 
| 
      
 276 
     | 
    
         
            +
                      if @source.match?("<?", true)
         
     | 
| 
      
 277 
     | 
    
         
            +
                        return process_instruction
         
     | 
| 
      
 278 
     | 
    
         
            +
                      elsif @source.match?("<!", true)
         
     | 
| 
      
 279 
     | 
    
         
            +
                        if @source.match?("--", true)
         
     | 
| 
      
 280 
     | 
    
         
            +
                          md = @source.match(/(.*?)-->/um, true)
         
     | 
| 
       255 
281 
     | 
    
         
             
                          if md.nil?
         
     | 
| 
       256 
282 
     | 
    
         
             
                            raise REXML::ParseException.new("Unclosed comment", @source)
         
     | 
| 
       257 
283 
     | 
    
         
             
                          end
         
     | 
| 
         @@ -259,10 +285,10 @@ module REXML 
     | 
|
| 
       259 
285 
     | 
    
         
             
                            raise REXML::ParseException.new("Malformed comment", @source)
         
     | 
| 
       260 
286 
     | 
    
         
             
                          end
         
     | 
| 
       261 
287 
     | 
    
         
             
                          return [ :comment, md[1] ]
         
     | 
| 
       262 
     | 
    
         
            -
                        elsif @source.match("DOCTYPE", true)
         
     | 
| 
      
 288 
     | 
    
         
            +
                        elsif @source.match?("DOCTYPE", true)
         
     | 
| 
       263 
289 
     | 
    
         
             
                          base_error_message = "Malformed DOCTYPE"
         
     | 
| 
       264 
     | 
    
         
            -
                          unless @source.match(/\s+/um, true)
         
     | 
| 
       265 
     | 
    
         
            -
                            if @source.match(">")
         
     | 
| 
      
 290 
     | 
    
         
            +
                          unless @source.match?(/\s+/um, true)
         
     | 
| 
      
 291 
     | 
    
         
            +
                            if @source.match?(">")
         
     | 
| 
       266 
292 
     | 
    
         
             
                              message = "#{base_error_message}: name is missing"
         
     | 
| 
       267 
293 
     | 
    
         
             
                            else
         
     | 
| 
       268 
294 
     | 
    
         
             
                              message = "#{base_error_message}: invalid name"
         
     | 
| 
         @@ -270,12 +296,11 @@ module REXML 
     | 
|
| 
       270 
296 
     | 
    
         
             
                            @source.position = start_position
         
     | 
| 
       271 
297 
     | 
    
         
             
                            raise REXML::ParseException.new(message, @source)
         
     | 
| 
       272 
298 
     | 
    
         
             
                          end
         
     | 
| 
       273 
     | 
    
         
            -
                          @nsstack.unshift(Set.new)
         
     | 
| 
       274 
299 
     | 
    
         
             
                          name = parse_name(base_error_message)
         
     | 
| 
       275 
     | 
    
         
            -
                          if @source.match(/\s*\[/um, true)
         
     | 
| 
      
 300 
     | 
    
         
            +
                          if @source.match?(/\s*\[/um, true)
         
     | 
| 
       276 
301 
     | 
    
         
             
                            id = [nil, nil, nil]
         
     | 
| 
       277 
302 
     | 
    
         
             
                            @document_status = :in_doctype
         
     | 
| 
       278 
     | 
    
         
            -
                          elsif @source.match(/\s*>/um, true)
         
     | 
| 
      
 303 
     | 
    
         
            +
                          elsif @source.match?(/\s*>/um, true)
         
     | 
| 
       279 
304 
     | 
    
         
             
                            id = [nil, nil, nil]
         
     | 
| 
       280 
305 
     | 
    
         
             
                            @document_status = :after_doctype
         
     | 
| 
       281 
306 
     | 
    
         
             
                            @source.ensure_buffer
         
     | 
| 
         @@ -287,9 +312,9 @@ module REXML 
     | 
|
| 
       287 
312 
     | 
    
         
             
                              # For backward compatibility
         
     | 
| 
       288 
313 
     | 
    
         
             
                              id[1], id[2] = id[2], nil
         
     | 
| 
       289 
314 
     | 
    
         
             
                            end
         
     | 
| 
       290 
     | 
    
         
            -
                            if @source.match(/\s*\[/um, true)
         
     | 
| 
      
 315 
     | 
    
         
            +
                            if @source.match?(/\s*\[/um, true)
         
     | 
| 
       291 
316 
     | 
    
         
             
                              @document_status = :in_doctype
         
     | 
| 
       292 
     | 
    
         
            -
                            elsif @source.match(/\s*>/um, true)
         
     | 
| 
      
 317 
     | 
    
         
            +
                            elsif @source.match?(/\s*>/um, true)
         
     | 
| 
       293 
318 
     | 
    
         
             
                              @document_status = :after_doctype
         
     | 
| 
       294 
319 
     | 
    
         
             
                              @source.ensure_buffer
         
     | 
| 
       295 
320 
     | 
    
         
             
                            else
         
     | 
| 
         @@ -299,7 +324,7 @@ module REXML 
     | 
|
| 
       299 
324 
     | 
    
         
             
                          end
         
     | 
| 
       300 
325 
     | 
    
         
             
                          args = [:start_doctype, name, *id]
         
     | 
| 
       301 
326 
     | 
    
         
             
                          if @document_status == :after_doctype
         
     | 
| 
       302 
     | 
    
         
            -
                            @source.match(/\s*/um, true)
         
     | 
| 
      
 327 
     | 
    
         
            +
                            @source.match?(/\s*/um, true)
         
     | 
| 
       303 
328 
     | 
    
         
             
                            @stack << [ :end_doctype ]
         
     | 
| 
       304 
329 
     | 
    
         
             
                          end
         
     | 
| 
       305 
330 
     | 
    
         
             
                          return args
         
     | 
| 
         @@ -310,15 +335,19 @@ module REXML 
     | 
|
| 
       310 
335 
     | 
    
         
             
                      end
         
     | 
| 
       311 
336 
     | 
    
         
             
                    end
         
     | 
| 
       312 
337 
     | 
    
         
             
                    if @document_status == :in_doctype
         
     | 
| 
       313 
     | 
    
         
            -
                      @source.match(/\s*/um, true) # skip spaces
         
     | 
| 
      
 338 
     | 
    
         
            +
                      @source.match?(/\s*/um, true) # skip spaces
         
     | 
| 
       314 
339 
     | 
    
         
             
                      start_position = @source.position
         
     | 
| 
       315 
     | 
    
         
            -
                      if @source.match("<!", true)
         
     | 
| 
       316 
     | 
    
         
            -
                        if @source.match("ELEMENT", true)
         
     | 
| 
      
 340 
     | 
    
         
            +
                      if @source.match?("<!", true)
         
     | 
| 
      
 341 
     | 
    
         
            +
                        if @source.match?("ELEMENT", true)
         
     | 
| 
       317 
342 
     | 
    
         
             
                          md = @source.match(/(.*?)>/um, true)
         
     | 
| 
       318 
343 
     | 
    
         
             
                          raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
         
     | 
| 
       319 
344 
     | 
    
         
             
                          return [ :elementdecl, "<!ELEMENT" + md[1] ]
         
     | 
| 
       320 
     | 
    
         
            -
                        elsif @source.match("ENTITY", true)
         
     | 
| 
       321 
     | 
    
         
            -
                           
     | 
| 
      
 345 
     | 
    
         
            +
                        elsif @source.match?("ENTITY", true)
         
     | 
| 
      
 346 
     | 
    
         
            +
                          match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
         
     | 
| 
      
 347 
     | 
    
         
            +
                          unless match_data
         
     | 
| 
      
 348 
     | 
    
         
            +
                            raise REXML::ParseException.new("Malformed entity declaration", @source)
         
     | 
| 
      
 349 
     | 
    
         
            +
                          end
         
     | 
| 
      
 350 
     | 
    
         
            +
                          match = [:entitydecl, *match_data.captures.compact]
         
     | 
| 
       322 
351 
     | 
    
         
             
                          ref = false
         
     | 
| 
       323 
352 
     | 
    
         
             
                          if match[1] == '%'
         
     | 
| 
       324 
353 
     | 
    
         
             
                            ref = true
         
     | 
| 
         @@ -336,6 +365,8 @@ module REXML 
     | 
|
| 
       336 
365 
     | 
    
         
             
                            match[4] = match[4][1..-2] # HREF
         
     | 
| 
       337 
366 
     | 
    
         
             
                            match.delete_at(5) if match.size > 5 # Chop out NDATA decl
         
     | 
| 
       338 
367 
     | 
    
         
             
                            # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
         
     | 
| 
      
 368 
     | 
    
         
            +
                          elsif Private::PEREFERENCE_PATTERN.match?(match[2])
         
     | 
| 
      
 369 
     | 
    
         
            +
                            raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
         
     | 
| 
       339 
370 
     | 
    
         
             
                          else
         
     | 
| 
       340 
371 
     | 
    
         
             
                            match[2] = match[2][1..-2]
         
     | 
| 
       341 
372 
     | 
    
         
             
                            match.pop if match.size == 4
         
     | 
| 
         @@ -343,7 +374,7 @@ module REXML 
     | 
|
| 
       343 
374 
     | 
    
         
             
                          end
         
     | 
| 
       344 
375 
     | 
    
         
             
                          match << '%' if ref
         
     | 
| 
       345 
376 
     | 
    
         
             
                          return match
         
     | 
| 
       346 
     | 
    
         
            -
                        elsif @source.match("ATTLIST", true)
         
     | 
| 
      
 377 
     | 
    
         
            +
                        elsif @source.match?("ATTLIST", true)
         
     | 
| 
       347 
378 
     | 
    
         
             
                          md = @source.match(Private::ATTLISTDECL_END, true)
         
     | 
| 
       348 
379 
     | 
    
         
             
                          raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
         
     | 
| 
       349 
380 
     | 
    
         
             
                          element = md[1]
         
     | 
| 
         @@ -358,15 +389,15 @@ module REXML 
     | 
|
| 
       358 
389 
     | 
    
         
             
                              val = attdef[4] if val == "#FIXED "
         
     | 
| 
       359 
390 
     | 
    
         
             
                              pairs[attdef[0]] = val
         
     | 
| 
       360 
391 
     | 
    
         
             
                              if attdef[0] =~ /^xmlns:(.*)/
         
     | 
| 
       361 
     | 
    
         
            -
                                @ 
     | 
| 
      
 392 
     | 
    
         
            +
                                @namespaces[$1] = val
         
     | 
| 
       362 
393 
     | 
    
         
             
                              end
         
     | 
| 
       363 
394 
     | 
    
         
             
                            end
         
     | 
| 
       364 
395 
     | 
    
         
             
                          end
         
     | 
| 
       365 
396 
     | 
    
         
             
                          return [ :attlistdecl, element, pairs, contents ]
         
     | 
| 
       366 
     | 
    
         
            -
                        elsif @source.match("NOTATION", true)
         
     | 
| 
      
 397 
     | 
    
         
            +
                        elsif @source.match?("NOTATION", true)
         
     | 
| 
       367 
398 
     | 
    
         
             
                          base_error_message = "Malformed notation declaration"
         
     | 
| 
       368 
     | 
    
         
            -
                          unless @source.match(/\s+/um, true)
         
     | 
| 
       369 
     | 
    
         
            -
                            if @source.match(">")
         
     | 
| 
      
 399 
     | 
    
         
            +
                          unless @source.match?(/\s+/um, true)
         
     | 
| 
      
 400 
     | 
    
         
            +
                            if @source.match?(">")
         
     | 
| 
       370 
401 
     | 
    
         
             
                              message = "#{base_error_message}: name is missing"
         
     | 
| 
       371 
402 
     | 
    
         
             
                            else
         
     | 
| 
       372 
403 
     | 
    
         
             
                              message = "#{base_error_message}: invalid name"
         
     | 
| 
         @@ -378,21 +409,21 @@ module REXML 
     | 
|
| 
       378 
409 
     | 
    
         
             
                          id = parse_id(base_error_message,
         
     | 
| 
       379 
410 
     | 
    
         
             
                                        accept_external_id: true,
         
     | 
| 
       380 
411 
     | 
    
         
             
                                        accept_public_id: true)
         
     | 
| 
       381 
     | 
    
         
            -
                          unless @source.match(/\s*>/um, true)
         
     | 
| 
      
 412 
     | 
    
         
            +
                          unless @source.match?(/\s*>/um, true)
         
     | 
| 
       382 
413 
     | 
    
         
             
                            message = "#{base_error_message}: garbage before end >"
         
     | 
| 
       383 
414 
     | 
    
         
             
                            raise REXML::ParseException.new(message, @source)
         
     | 
| 
       384 
415 
     | 
    
         
             
                          end
         
     | 
| 
       385 
416 
     | 
    
         
             
                          return [:notationdecl, name, *id]
         
     | 
| 
       386 
     | 
    
         
            -
                        elsif md = @source.match(/--(.*?)-->/um, true 
     | 
| 
      
 417 
     | 
    
         
            +
                        elsif md = @source.match(/--(.*?)-->/um, true)
         
     | 
| 
       387 
418 
     | 
    
         
             
                          case md[1]
         
     | 
| 
       388 
419 
     | 
    
         
             
                          when /--/, /-\z/
         
     | 
| 
       389 
420 
     | 
    
         
             
                            raise REXML::ParseException.new("Malformed comment", @source)
         
     | 
| 
       390 
421 
     | 
    
         
             
                          end
         
     | 
| 
       391 
422 
     | 
    
         
             
                          return [ :comment, md[1] ] if md
         
     | 
| 
       392 
423 
     | 
    
         
             
                        end
         
     | 
| 
       393 
     | 
    
         
            -
                      elsif match = @source.match(/(%.*?;)\s*/um, true 
     | 
| 
      
 424 
     | 
    
         
            +
                      elsif match = @source.match(/(%.*?;)\s*/um, true)
         
     | 
| 
       394 
425 
     | 
    
         
             
                        return [ :externalentity, match[1] ]
         
     | 
| 
       395 
     | 
    
         
            -
                      elsif @source.match(/\]\s*>/um, true)
         
     | 
| 
      
 426 
     | 
    
         
            +
                      elsif @source.match?(/\]\s*>/um, true)
         
     | 
| 
       396 
427 
     | 
    
         
             
                        @document_status = :after_doctype
         
     | 
| 
       397 
428 
     | 
    
         
             
                        return [ :end_doctype ]
         
     | 
| 
       398 
429 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -401,17 +432,17 @@ module REXML 
     | 
|
| 
       401 
432 
     | 
    
         
             
                      end
         
     | 
| 
       402 
433 
     | 
    
         
             
                    end
         
     | 
| 
       403 
434 
     | 
    
         
             
                    if @document_status == :after_doctype
         
     | 
| 
       404 
     | 
    
         
            -
                      @source.match(/\s*/um, true)
         
     | 
| 
      
 435 
     | 
    
         
            +
                      @source.match?(/\s*/um, true)
         
     | 
| 
       405 
436 
     | 
    
         
             
                    end
         
     | 
| 
       406 
437 
     | 
    
         
             
                    begin
         
     | 
| 
       407 
438 
     | 
    
         
             
                      start_position = @source.position
         
     | 
| 
       408 
     | 
    
         
            -
                      if @source.match("<", true)
         
     | 
| 
      
 439 
     | 
    
         
            +
                      if @source.match?("<", true)
         
     | 
| 
       409 
440 
     | 
    
         
             
                        # :text's read_until may remain only "<" in buffer. In the
         
     | 
| 
       410 
441 
     | 
    
         
             
                        # case, buffer is empty here. So we need to fill buffer
         
     | 
| 
       411 
442 
     | 
    
         
             
                        # here explicitly.
         
     | 
| 
       412 
443 
     | 
    
         
             
                        @source.ensure_buffer
         
     | 
| 
       413 
     | 
    
         
            -
                        if @source.match("/", true)
         
     | 
| 
       414 
     | 
    
         
            -
                          @ 
     | 
| 
      
 444 
     | 
    
         
            +
                        if @source.match?("/", true)
         
     | 
| 
      
 445 
     | 
    
         
            +
                          @namespaces_restore_stack.pop
         
     | 
| 
       415 
446 
     | 
    
         
             
                          last_tag = @tags.pop
         
     | 
| 
       416 
447 
     | 
    
         
             
                          md = @source.match(Private::CLOSE_PATTERN, true)
         
     | 
| 
       417 
448 
     | 
    
         
             
                          if md and !last_tag
         
     | 
| 
         @@ -425,12 +456,12 @@ module REXML 
     | 
|
| 
       425 
456 
     | 
    
         
             
                            raise REXML::ParseException.new(message, @source)
         
     | 
| 
       426 
457 
     | 
    
         
             
                          end
         
     | 
| 
       427 
458 
     | 
    
         
             
                          return [ :end_element, last_tag ]
         
     | 
| 
       428 
     | 
    
         
            -
                        elsif @source.match("!", true)
         
     | 
| 
      
 459 
     | 
    
         
            +
                        elsif @source.match?("!", true)
         
     | 
| 
       429 
460 
     | 
    
         
             
                          md = @source.match(/([^>]*>)/um)
         
     | 
| 
       430 
461 
     | 
    
         
             
                          #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
         
     | 
| 
       431 
462 
     | 
    
         
             
                          raise REXML::ParseException.new("Malformed node", @source) unless md
         
     | 
| 
       432 
463 
     | 
    
         
             
                          if md[0][0] == ?-
         
     | 
| 
       433 
     | 
    
         
            -
                            md = @source.match(/--(.*?)-->/um, true 
     | 
| 
      
 464 
     | 
    
         
            +
                            md = @source.match(/--(.*?)-->/um, true)
         
     | 
| 
       434 
465 
     | 
    
         | 
| 
       435 
466 
     | 
    
         
             
                            if md.nil? || /--|-\z/.match?(md[1])
         
     | 
| 
       436 
467 
     | 
    
         
             
                              raise REXML::ParseException.new("Malformed comment", @source)
         
     | 
| 
         @@ -438,13 +469,13 @@ module REXML 
     | 
|
| 
       438 
469 
     | 
    
         | 
| 
       439 
470 
     | 
    
         
             
                            return [ :comment, md[1] ]
         
     | 
| 
       440 
471 
     | 
    
         
             
                          else
         
     | 
| 
       441 
     | 
    
         
            -
                            md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true 
     | 
| 
      
 472 
     | 
    
         
            +
                            md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
         
     | 
| 
       442 
473 
     | 
    
         
             
                            return [ :cdata, md[1] ] if md
         
     | 
| 
       443 
474 
     | 
    
         
             
                          end
         
     | 
| 
       444 
475 
     | 
    
         
             
                          raise REXML::ParseException.new( "Declarations can only occur "+
         
     | 
| 
       445 
476 
     | 
    
         
             
                            "in the doctype declaration.", @source)
         
     | 
| 
       446 
     | 
    
         
            -
                        elsif @source.match("?", true)
         
     | 
| 
       447 
     | 
    
         
            -
                          return process_instruction 
     | 
| 
      
 477 
     | 
    
         
            +
                        elsif @source.match?("?", true)
         
     | 
| 
      
 478 
     | 
    
         
            +
                          return process_instruction
         
     | 
| 
       448 
479 
     | 
    
         
             
                        else
         
     | 
| 
       449 
480 
     | 
    
         
             
                          # Get the next tag
         
     | 
| 
       450 
481 
     | 
    
         
             
                          md = @source.match(Private::TAG_PATTERN, true)
         
     | 
| 
         @@ -456,18 +487,18 @@ module REXML 
     | 
|
| 
       456 
487 
     | 
    
         
             
                          @document_status = :in_element
         
     | 
| 
       457 
488 
     | 
    
         
             
                          @prefixes.clear
         
     | 
| 
       458 
489 
     | 
    
         
             
                          @prefixes << md[2] if md[2]
         
     | 
| 
       459 
     | 
    
         
            -
                           
     | 
| 
       460 
     | 
    
         
            -
                          attributes, closed = parse_attributes(@prefixes 
     | 
| 
      
 490 
     | 
    
         
            +
                          push_namespaces_restore
         
     | 
| 
      
 491 
     | 
    
         
            +
                          attributes, closed = parse_attributes(@prefixes)
         
     | 
| 
       461 
492 
     | 
    
         
             
                          # Verify that all of the prefixes have been defined
         
     | 
| 
       462 
493 
     | 
    
         
             
                          for prefix in @prefixes
         
     | 
| 
       463 
     | 
    
         
            -
                            unless @ 
     | 
| 
      
 494 
     | 
    
         
            +
                            unless @namespaces.key?(prefix)
         
     | 
| 
       464 
495 
     | 
    
         
             
                              raise UndefinedNamespaceException.new(prefix,@source,self)
         
     | 
| 
       465 
496 
     | 
    
         
             
                            end
         
     | 
| 
       466 
497 
     | 
    
         
             
                          end
         
     | 
| 
       467 
498 
     | 
    
         | 
| 
       468 
499 
     | 
    
         
             
                          if closed
         
     | 
| 
       469 
500 
     | 
    
         
             
                            @closed = tag
         
     | 
| 
       470 
     | 
    
         
            -
                             
     | 
| 
      
 501 
     | 
    
         
            +
                            pop_namespaces_restore
         
     | 
| 
       471 
502 
     | 
    
         
             
                          else
         
     | 
| 
       472 
503 
     | 
    
         
             
                            if @tags.empty? and @have_root
         
     | 
| 
       473 
504 
     | 
    
         
             
                              raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
         
     | 
| 
         @@ -482,11 +513,15 @@ module REXML 
     | 
|
| 
       482 
513 
     | 
    
         
             
                        if text.chomp!("<")
         
     | 
| 
       483 
514 
     | 
    
         
             
                          @source.position -= "<".bytesize
         
     | 
| 
       484 
515 
     | 
    
         
             
                        end
         
     | 
| 
       485 
     | 
    
         
            -
                        if @tags.empty? 
     | 
| 
      
 516 
     | 
    
         
            +
                        if @tags.empty?
         
     | 
| 
       486 
517 
     | 
    
         
             
                          unless /\A\s*\z/.match?(text)
         
     | 
| 
       487 
     | 
    
         
            -
                             
     | 
| 
      
 518 
     | 
    
         
            +
                            if @have_root
         
     | 
| 
      
 519 
     | 
    
         
            +
                              raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
         
     | 
| 
      
 520 
     | 
    
         
            +
                            else
         
     | 
| 
      
 521 
     | 
    
         
            +
                              raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
         
     | 
| 
      
 522 
     | 
    
         
            +
                            end
         
     | 
| 
       488 
523 
     | 
    
         
             
                          end
         
     | 
| 
       489 
     | 
    
         
            -
                          return pull_event
         
     | 
| 
      
 524 
     | 
    
         
            +
                          return pull_event if @have_root
         
     | 
| 
       490 
525 
     | 
    
         
             
                        end
         
     | 
| 
       491 
526 
     | 
    
         
             
                        return [ :text, text ]
         
     | 
| 
       492 
527 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -503,13 +538,13 @@ module REXML 
     | 
|
| 
       503 
538 
     | 
    
         
             
                  private :pull_event
         
     | 
| 
       504 
539 
     | 
    
         | 
| 
       505 
540 
     | 
    
         
             
                  def entity( reference, entities )
         
     | 
| 
       506 
     | 
    
         
            -
                     
     | 
| 
       507 
     | 
    
         
            -
             
     | 
| 
       508 
     | 
    
         
            -
                     
     | 
| 
       509 
     | 
    
         
            -
             
     | 
| 
       510 
     | 
    
         
            -
             
     | 
| 
       511 
     | 
    
         
            -
                     
     | 
| 
       512 
     | 
    
         
            -
                    unnormalize( value, entities ) 
     | 
| 
      
 541 
     | 
    
         
            +
                    return unless entities
         
     | 
| 
      
 542 
     | 
    
         
            +
             
     | 
| 
      
 543 
     | 
    
         
            +
                    value = entities[ reference ]
         
     | 
| 
      
 544 
     | 
    
         
            +
                    return if value.nil?
         
     | 
| 
      
 545 
     | 
    
         
            +
             
     | 
| 
      
 546 
     | 
    
         
            +
                    record_entity_expansion
         
     | 
| 
      
 547 
     | 
    
         
            +
                    unnormalize( value, entities )
         
     | 
| 
       513 
548 
     | 
    
         
             
                  end
         
     | 
| 
       514 
549 
     | 
    
         | 
| 
       515 
550 
     | 
    
         
             
                  # Escapes all possible entities
         
     | 
| 
         @@ -539,21 +574,37 @@ module REXML 
     | 
|
| 
       539 
574 
     | 
    
         
             
                    return rv if matches.size == 0
         
     | 
| 
       540 
575 
     | 
    
         
             
                    rv.gsub!( Private::CHARACTER_REFERENCES ) {
         
     | 
| 
       541 
576 
     | 
    
         
             
                      m=$1
         
     | 
| 
       542 
     | 
    
         
            -
                       
     | 
| 
       543 
     | 
    
         
            -
             
     | 
| 
      
 577 
     | 
    
         
            +
                      if m.start_with?("x")
         
     | 
| 
      
 578 
     | 
    
         
            +
                        code_point = Integer(m[1..-1], 16)
         
     | 
| 
      
 579 
     | 
    
         
            +
                      else
         
     | 
| 
      
 580 
     | 
    
         
            +
                        code_point = Integer(m, 10)
         
     | 
| 
      
 581 
     | 
    
         
            +
                      end
         
     | 
| 
      
 582 
     | 
    
         
            +
                      [code_point].pack('U*')
         
     | 
| 
       544 
583 
     | 
    
         
             
                    }
         
     | 
| 
       545 
584 
     | 
    
         
             
                    matches.collect!{|x|x[0]}.compact!
         
     | 
| 
      
 585 
     | 
    
         
            +
                    if filter
         
     | 
| 
      
 586 
     | 
    
         
            +
                      matches.reject! do |entity_reference|
         
     | 
| 
      
 587 
     | 
    
         
            +
                        filter.include?(entity_reference)
         
     | 
| 
      
 588 
     | 
    
         
            +
                      end
         
     | 
| 
      
 589 
     | 
    
         
            +
                    end
         
     | 
| 
       546 
590 
     | 
    
         
             
                    if matches.size > 0
         
     | 
| 
       547 
     | 
    
         
            -
                      matches.each do |entity_reference|
         
     | 
| 
       548 
     | 
    
         
            -
                         
     | 
| 
       549 
     | 
    
         
            -
             
     | 
| 
       550 
     | 
    
         
            -
             
     | 
| 
       551 
     | 
    
         
            -
             
     | 
| 
       552 
     | 
    
         
            -
                             
     | 
| 
       553 
     | 
    
         
            -
             
     | 
| 
       554 
     | 
    
         
            -
                             
     | 
| 
       555 
     | 
    
         
            -
             
     | 
| 
      
 591 
     | 
    
         
            +
                      matches.tally.each do |entity_reference, n|
         
     | 
| 
      
 592 
     | 
    
         
            +
                        entity_expansion_count_before = @entity_expansion_count
         
     | 
| 
      
 593 
     | 
    
         
            +
                        entity_value = entity( entity_reference, entities )
         
     | 
| 
      
 594 
     | 
    
         
            +
                        if entity_value
         
     | 
| 
      
 595 
     | 
    
         
            +
                          if n > 1
         
     | 
| 
      
 596 
     | 
    
         
            +
                            entity_expansion_count_delta =
         
     | 
| 
      
 597 
     | 
    
         
            +
                              @entity_expansion_count - entity_expansion_count_before
         
     | 
| 
      
 598 
     | 
    
         
            +
                            record_entity_expansion(entity_expansion_count_delta * (n - 1))
         
     | 
| 
      
 599 
     | 
    
         
            +
                          end
         
     | 
| 
      
 600 
     | 
    
         
            +
                          re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
         
     | 
| 
      
 601 
     | 
    
         
            +
                          rv.gsub!( re, entity_value )
         
     | 
| 
      
 602 
     | 
    
         
            +
                          if rv.bytesize > @entity_expansion_text_limit
         
     | 
| 
      
 603 
     | 
    
         
            +
                            raise "entity expansion has grown too large"
         
     | 
| 
       556 
604 
     | 
    
         
             
                          end
         
     | 
| 
      
 605 
     | 
    
         
            +
                        else
         
     | 
| 
      
 606 
     | 
    
         
            +
                          er = DEFAULT_ENTITIES[entity_reference]
         
     | 
| 
      
 607 
     | 
    
         
            +
                          rv.gsub!( er[0], er[2] ) if er
         
     | 
| 
       557 
608 
     | 
    
         
             
                        end
         
     | 
| 
       558 
609 
     | 
    
         
             
                      end
         
     | 
| 
       559 
610 
     | 
    
         
             
                      rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
         
     | 
| 
         @@ -562,6 +613,39 @@ module REXML 
     | 
|
| 
       562 
613 
     | 
    
         
             
                  end
         
     | 
| 
       563 
614 
     | 
    
         | 
| 
       564 
615 
     | 
    
         
             
                  private
         
     | 
| 
      
 616 
     | 
    
         
            +
                  def add_namespace(prefix, uri)
         
     | 
| 
      
 617 
     | 
    
         
            +
                    @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
         
     | 
| 
      
 618 
     | 
    
         
            +
                    if uri.nil?
         
     | 
| 
      
 619 
     | 
    
         
            +
                      @namespaces.delete(prefix)
         
     | 
| 
      
 620 
     | 
    
         
            +
                    else
         
     | 
| 
      
 621 
     | 
    
         
            +
                      @namespaces[prefix] = uri
         
     | 
| 
      
 622 
     | 
    
         
            +
                    end
         
     | 
| 
      
 623 
     | 
    
         
            +
                  end
         
     | 
| 
      
 624 
     | 
    
         
            +
             
     | 
| 
      
 625 
     | 
    
         
            +
                  def push_namespaces_restore
         
     | 
| 
      
 626 
     | 
    
         
            +
                    namespaces_restore = {}
         
     | 
| 
      
 627 
     | 
    
         
            +
                    @namespaces_restore_stack.push(namespaces_restore)
         
     | 
| 
      
 628 
     | 
    
         
            +
                    namespaces_restore
         
     | 
| 
      
 629 
     | 
    
         
            +
                  end
         
     | 
| 
      
 630 
     | 
    
         
            +
             
     | 
| 
      
 631 
     | 
    
         
            +
                  def pop_namespaces_restore
         
     | 
| 
      
 632 
     | 
    
         
            +
                    namespaces_restore = @namespaces_restore_stack.pop
         
     | 
| 
      
 633 
     | 
    
         
            +
                    namespaces_restore.each do |prefix, uri|
         
     | 
| 
      
 634 
     | 
    
         
            +
                      if uri.nil?
         
     | 
| 
      
 635 
     | 
    
         
            +
                        @namespaces.delete(prefix)
         
     | 
| 
      
 636 
     | 
    
         
            +
                      else
         
     | 
| 
      
 637 
     | 
    
         
            +
                        @namespaces[prefix] = uri
         
     | 
| 
      
 638 
     | 
    
         
            +
                      end
         
     | 
| 
      
 639 
     | 
    
         
            +
                    end
         
     | 
| 
      
 640 
     | 
    
         
            +
                  end
         
     | 
| 
      
 641 
     | 
    
         
            +
             
     | 
| 
      
 642 
     | 
    
         
            +
                  def record_entity_expansion(delta=1)
         
     | 
| 
      
 643 
     | 
    
         
            +
                    @entity_expansion_count += delta
         
     | 
| 
      
 644 
     | 
    
         
            +
                    if @entity_expansion_count > @entity_expansion_limit
         
     | 
| 
      
 645 
     | 
    
         
            +
                      raise "number of entity expansions exceeded, processing aborted."
         
     | 
| 
      
 646 
     | 
    
         
            +
                    end
         
     | 
| 
      
 647 
     | 
    
         
            +
                  end
         
     | 
| 
      
 648 
     | 
    
         
            +
             
     | 
| 
       565 
649 
     | 
    
         
             
                  def need_source_encoding_update?(xml_declaration_encoding)
         
     | 
| 
       566 
650 
     | 
    
         
             
                    return false if xml_declaration_encoding.nil?
         
     | 
| 
       567 
651 
     | 
    
         
             
                    return false if /\AUTF-16\z/i =~ xml_declaration_encoding
         
     | 
| 
         @@ -571,14 +655,14 @@ module REXML 
     | 
|
| 
       571 
655 
     | 
    
         
             
                  def parse_name(base_error_message)
         
     | 
| 
       572 
656 
     | 
    
         
             
                    md = @source.match(Private::NAME_PATTERN, true)
         
     | 
| 
       573 
657 
     | 
    
         
             
                    unless md
         
     | 
| 
       574 
     | 
    
         
            -
                      if @source.match(/\ 
     | 
| 
      
 658 
     | 
    
         
            +
                      if @source.match?(/\S/um)
         
     | 
| 
       575 
659 
     | 
    
         
             
                        message = "#{base_error_message}: invalid name"
         
     | 
| 
       576 
660 
     | 
    
         
             
                      else
         
     | 
| 
       577 
661 
     | 
    
         
             
                        message = "#{base_error_message}: name is missing"
         
     | 
| 
       578 
662 
     | 
    
         
             
                      end
         
     | 
| 
       579 
663 
     | 
    
         
             
                      raise REXML::ParseException.new(message, @source)
         
     | 
| 
       580 
664 
     | 
    
         
             
                    end
         
     | 
| 
       581 
     | 
    
         
            -
                    md[ 
     | 
| 
      
 665 
     | 
    
         
            +
                    md[0]
         
     | 
| 
       582 
666 
     | 
    
         
             
                  end
         
     | 
| 
       583 
667 
     | 
    
         | 
| 
       584 
668 
     | 
    
         
             
                  def parse_id(base_error_message,
         
     | 
| 
         @@ -613,52 +697,58 @@ module REXML 
     | 
|
| 
       613 
697 
     | 
    
         
             
                                               accept_public_id:)
         
     | 
| 
       614 
698 
     | 
    
         
             
                    public = /\A\s*PUBLIC/um
         
     | 
| 
       615 
699 
     | 
    
         
             
                    system = /\A\s*SYSTEM/um
         
     | 
| 
       616 
     | 
    
         
            -
                    if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
         
     | 
| 
       617 
     | 
    
         
            -
                      if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
         
     | 
| 
      
 700 
     | 
    
         
            +
                    if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
         
     | 
| 
      
 701 
     | 
    
         
            +
                      if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
         
     | 
| 
       618 
702 
     | 
    
         
             
                        return "public ID literal is missing"
         
     | 
| 
       619 
703 
     | 
    
         
             
                      end
         
     | 
| 
       620 
     | 
    
         
            -
                      unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
         
     | 
| 
      
 704 
     | 
    
         
            +
                      unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
         
     | 
| 
       621 
705 
     | 
    
         
             
                        return "invalid public ID literal"
         
     | 
| 
       622 
706 
     | 
    
         
             
                      end
         
     | 
| 
       623 
707 
     | 
    
         
             
                      if accept_public_id
         
     | 
| 
       624 
     | 
    
         
            -
                        if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
         
     | 
| 
      
 708 
     | 
    
         
            +
                        if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
         
     | 
| 
       625 
709 
     | 
    
         
             
                          return "system ID literal is missing"
         
     | 
| 
       626 
710 
     | 
    
         
             
                        end
         
     | 
| 
       627 
     | 
    
         
            -
                        unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
         
     | 
| 
      
 711 
     | 
    
         
            +
                        unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
         
     | 
| 
       628 
712 
     | 
    
         
             
                          return "invalid system literal"
         
     | 
| 
       629 
713 
     | 
    
         
             
                        end
         
     | 
| 
       630 
714 
     | 
    
         
             
                        "garbage after system literal"
         
     | 
| 
       631 
715 
     | 
    
         
             
                      else
         
     | 
| 
       632 
716 
     | 
    
         
             
                        "garbage after public ID literal"
         
     | 
| 
       633 
717 
     | 
    
         
             
                      end
         
     | 
| 
       634 
     | 
    
         
            -
                    elsif accept_external_id and @source.match(/#{system}/um)
         
     | 
| 
       635 
     | 
    
         
            -
                      if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
         
     | 
| 
      
 718 
     | 
    
         
            +
                    elsif accept_external_id and @source.match?(/#{system}/um)
         
     | 
| 
      
 719 
     | 
    
         
            +
                      if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
         
     | 
| 
       636 
720 
     | 
    
         
             
                        return "system literal is missing"
         
     | 
| 
       637 
721 
     | 
    
         
             
                      end
         
     | 
| 
       638 
     | 
    
         
            -
                      unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
         
     | 
| 
      
 722 
     | 
    
         
            +
                      unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
         
     | 
| 
       639 
723 
     | 
    
         
             
                        return "invalid system literal"
         
     | 
| 
       640 
724 
     | 
    
         
             
                      end
         
     | 
| 
       641 
725 
     | 
    
         
             
                      "garbage after system literal"
         
     | 
| 
       642 
726 
     | 
    
         
             
                    else
         
     | 
| 
       643 
     | 
    
         
            -
                      unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
         
     | 
| 
      
 727 
     | 
    
         
            +
                      unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
         
     | 
| 
       644 
728 
     | 
    
         
             
                        return "invalid ID type"
         
     | 
| 
       645 
729 
     | 
    
         
             
                      end
         
     | 
| 
       646 
730 
     | 
    
         
             
                      "ID type is missing"
         
     | 
| 
       647 
731 
     | 
    
         
             
                    end
         
     | 
| 
       648 
732 
     | 
    
         
             
                  end
         
     | 
| 
       649 
733 
     | 
    
         | 
| 
       650 
     | 
    
         
            -
                  def process_instruction 
     | 
| 
       651 
     | 
    
         
            -
                     
     | 
| 
       652 
     | 
    
         
            -
                     
     | 
| 
       653 
     | 
    
         
            -
                       
     | 
| 
       654 
     | 
    
         
            -
                       
     | 
| 
       655 
     | 
    
         
            -
             
     | 
| 
      
 734 
     | 
    
         
            +
                  def process_instruction
         
     | 
| 
      
 735 
     | 
    
         
            +
                    name = parse_name("Malformed XML: Invalid processing instruction node")
         
     | 
| 
      
 736 
     | 
    
         
            +
                    if @source.match?(/\s+/um, true)
         
     | 
| 
      
 737 
     | 
    
         
            +
                      match_data = @source.match(/(.*?)\?>/um, true)
         
     | 
| 
      
 738 
     | 
    
         
            +
                      unless match_data
         
     | 
| 
      
 739 
     | 
    
         
            +
                        raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
         
     | 
| 
      
 740 
     | 
    
         
            +
                      end
         
     | 
| 
      
 741 
     | 
    
         
            +
                      content = match_data[1]
         
     | 
| 
      
 742 
     | 
    
         
            +
                    else
         
     | 
| 
      
 743 
     | 
    
         
            +
                      content = nil
         
     | 
| 
      
 744 
     | 
    
         
            +
                      unless @source.match?("?>", true)
         
     | 
| 
      
 745 
     | 
    
         
            +
                        raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
         
     | 
| 
      
 746 
     | 
    
         
            +
                      end
         
     | 
| 
       656 
747 
     | 
    
         
             
                    end
         
     | 
| 
       657 
     | 
    
         
            -
                    if  
     | 
| 
      
 748 
     | 
    
         
            +
                    if name == "xml"
         
     | 
| 
       658 
749 
     | 
    
         
             
                      if @document_status
         
     | 
| 
       659 
750 
     | 
    
         
             
                        raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
         
     | 
| 
       660 
751 
     | 
    
         
             
                      end
         
     | 
| 
       661 
     | 
    
         
            -
                      content = match_data[2]
         
     | 
| 
       662 
752 
     | 
    
         
             
                      version = VERSION.match(content)
         
     | 
| 
       663 
753 
     | 
    
         
             
                      version = version[1] unless version.nil?
         
     | 
| 
       664 
754 
     | 
    
         
             
                      encoding = ENCODING.match(content)
         
     | 
| 
         @@ -673,16 +763,17 @@ module REXML 
     | 
|
| 
       673 
763 
     | 
    
         
             
                      standalone = standalone[1] unless standalone.nil?
         
     | 
| 
       674 
764 
     | 
    
         
             
                      return [ :xmldecl, version, encoding, standalone ]
         
     | 
| 
       675 
765 
     | 
    
         
             
                    end
         
     | 
| 
       676 
     | 
    
         
            -
                    [:processing_instruction,  
     | 
| 
      
 766 
     | 
    
         
            +
                    [:processing_instruction, name, content]
         
     | 
| 
       677 
767 
     | 
    
         
             
                  end
         
     | 
| 
       678 
768 
     | 
    
         | 
| 
       679 
     | 
    
         
            -
                  def parse_attributes(prefixes 
     | 
| 
      
 769 
     | 
    
         
            +
                  def parse_attributes(prefixes)
         
     | 
| 
       680 
770 
     | 
    
         
             
                    attributes = {}
         
     | 
| 
      
 771 
     | 
    
         
            +
                    expanded_names = {}
         
     | 
| 
       681 
772 
     | 
    
         
             
                    closed = false
         
     | 
| 
       682 
773 
     | 
    
         
             
                    while true
         
     | 
| 
       683 
     | 
    
         
            -
                      if @source.match(">", true)
         
     | 
| 
      
 774 
     | 
    
         
            +
                      if @source.match?(">", true)
         
     | 
| 
       684 
775 
     | 
    
         
             
                        return attributes, closed
         
     | 
| 
       685 
     | 
    
         
            -
                      elsif @source.match("/>", true)
         
     | 
| 
      
 776 
     | 
    
         
            +
                      elsif @source.match?("/>", true)
         
     | 
| 
       686 
777 
     | 
    
         
             
                        closed = true
         
     | 
| 
       687 
778 
     | 
    
         
             
                        return attributes, closed
         
     | 
| 
       688 
779 
     | 
    
         
             
                      elsif match = @source.match(QNAME, true)
         
     | 
| 
         @@ -690,7 +781,7 @@ module REXML 
     | 
|
| 
       690 
781 
     | 
    
         
             
                        prefix = match[2]
         
     | 
| 
       691 
782 
     | 
    
         
             
                        local_part = match[3]
         
     | 
| 
       692 
783 
     | 
    
         | 
| 
       693 
     | 
    
         
            -
                        unless @source.match(/\s*=\s*/um, true)
         
     | 
| 
      
 784 
     | 
    
         
            +
                        unless @source.match?(/\s*=\s*/um, true)
         
     | 
| 
       694 
785 
     | 
    
         
             
                          message = "Missing attribute equal: <#{name}>"
         
     | 
| 
       695 
786 
     | 
    
         
             
                          raise REXML::ParseException.new(message, @source)
         
     | 
| 
       696 
787 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -706,10 +797,10 @@ module REXML 
     | 
|
| 
       706 
797 
     | 
    
         
             
                          message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
         
     | 
| 
       707 
798 
     | 
    
         
             
                          raise REXML::ParseException.new(message, @source)
         
     | 
| 
       708 
799 
     | 
    
         
             
                        end
         
     | 
| 
       709 
     | 
    
         
            -
                        @source.match(/\s*/um, true)
         
     | 
| 
      
 800 
     | 
    
         
            +
                        @source.match?(/\s*/um, true)
         
     | 
| 
       710 
801 
     | 
    
         
             
                        if prefix == "xmlns"
         
     | 
| 
       711 
802 
     | 
    
         
             
                          if local_part == "xml"
         
     | 
| 
       712 
     | 
    
         
            -
                            if value !=  
     | 
| 
      
 803 
     | 
    
         
            +
                            if value != Private::XML_PREFIXED_NAMESPACE
         
     | 
| 
       713 
804 
     | 
    
         
             
                              msg = "The 'xml' prefix must not be bound to any other namespace "+
         
     | 
| 
       714 
805 
     | 
    
         
             
                                "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
         
     | 
| 
       715 
806 
     | 
    
         
             
                              raise REXML::ParseException.new( msg, @source, self )
         
     | 
| 
         @@ -719,7 +810,7 @@ module REXML 
     | 
|
| 
       719 
810 
     | 
    
         
             
                              "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
         
     | 
| 
       720 
811 
     | 
    
         
             
                            raise REXML::ParseException.new( msg, @source, self)
         
     | 
| 
       721 
812 
     | 
    
         
             
                          end
         
     | 
| 
       722 
     | 
    
         
            -
                           
     | 
| 
      
 813 
     | 
    
         
            +
                          add_namespace(local_part, value)
         
     | 
| 
       723 
814 
     | 
    
         
             
                        elsif prefix
         
     | 
| 
       724 
815 
     | 
    
         
             
                          prefixes << prefix unless prefix == "xml"
         
     | 
| 
       725 
816 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -729,6 +820,20 @@ module REXML 
     | 
|
| 
       729 
820 
     | 
    
         
             
                          raise REXML::ParseException.new(msg, @source, self)
         
     | 
| 
       730 
821 
     | 
    
         
             
                        end
         
     | 
| 
       731 
822 
     | 
    
         | 
| 
      
 823 
     | 
    
         
            +
                        unless prefix == "xmlns"
         
     | 
| 
      
 824 
     | 
    
         
            +
                          uri = @namespaces[prefix]
         
     | 
| 
      
 825 
     | 
    
         
            +
                          expanded_name = [uri, local_part]
         
     | 
| 
      
 826 
     | 
    
         
            +
                          existing_prefix = expanded_names[expanded_name]
         
     | 
| 
      
 827 
     | 
    
         
            +
                          if existing_prefix
         
     | 
| 
      
 828 
     | 
    
         
            +
                            message = "Namespace conflict in adding attribute " +
         
     | 
| 
      
 829 
     | 
    
         
            +
                                      "\"#{local_part}\": " +
         
     | 
| 
      
 830 
     | 
    
         
            +
                                      "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
         
     | 
| 
      
 831 
     | 
    
         
            +
                                      "prefix \"#{prefix}\" = \"#{uri}\""
         
     | 
| 
      
 832 
     | 
    
         
            +
                            raise REXML::ParseException.new(message, @source, self)
         
     | 
| 
      
 833 
     | 
    
         
            +
                          end
         
     | 
| 
      
 834 
     | 
    
         
            +
                          expanded_names[expanded_name] = prefix
         
     | 
| 
      
 835 
     | 
    
         
            +
                        end
         
     | 
| 
      
 836 
     | 
    
         
            +
             
     | 
| 
       732 
837 
     | 
    
         
             
                        attributes[name] = value
         
     | 
| 
       733 
838 
     | 
    
         
             
                      else
         
     | 
| 
       734 
839 
     | 
    
         
             
                        message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
         
     | 
| 
         @@ -47,6 +47,18 @@ module REXML 
     | 
|
| 
       47 
47 
     | 
    
         
             
                    @listeners << listener
         
     | 
| 
       48 
48 
     | 
    
         
             
                  end
         
     | 
| 
       49 
49 
     | 
    
         | 
| 
      
 50 
     | 
    
         
            +
                  def entity_expansion_count
         
     | 
| 
      
 51 
     | 
    
         
            +
                    @parser.entity_expansion_count
         
     | 
| 
      
 52 
     | 
    
         
            +
                  end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                  def entity_expansion_limit=( limit )
         
     | 
| 
      
 55 
     | 
    
         
            +
                    @parser.entity_expansion_limit = limit
         
     | 
| 
      
 56 
     | 
    
         
            +
                  end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                  def entity_expansion_text_limit=( limit )
         
     | 
| 
      
 59 
     | 
    
         
            +
                    @parser.entity_expansion_text_limit = limit
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
       50 
62 
     | 
    
         
             
                  def each
         
     | 
| 
       51 
63 
     | 
    
         
             
                    while has_next?
         
     | 
| 
       52 
64 
     | 
    
         
             
                      yield self.pull
         
     | 
| 
         @@ -81,6 +93,10 @@ module REXML 
     | 
|
| 
       81 
93 
     | 
    
         
             
                  def unshift token
         
     | 
| 
       82 
94 
     | 
    
         
             
                    @my_stack.unshift token
         
     | 
| 
       83 
95 
     | 
    
         
             
                  end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                  def reset
         
     | 
| 
      
 98 
     | 
    
         
            +
                    @parser.reset
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
       84 
100 
     | 
    
         
             
                end
         
     | 
| 
       85 
101 
     | 
    
         | 
| 
       86 
102 
     | 
    
         
             
                # A parsing event.  The contents of the event are accessed as an +Array?,
         
     |