rexml 3.3.2 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +200 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +14 -16
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/parsers/baseparser.rb +206 -101
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +14 -0
- data/lib/rexml/parsers/streamparser.rb +15 -9
- data/lib/rexml/parsers/treeparser.rb +0 -7
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +63 -12
- data/lib/rexml/text.rb +20 -43
- metadata +8 -19
| @@ -1,12 +1,29 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 | 
             
            require_relative '../parseexception'
         | 
| 3 3 | 
             
            require_relative '../undefinednamespaceexception'
         | 
| 4 | 
            +
            require_relative '../security'
         | 
| 4 5 | 
             
            require_relative '../source'
         | 
| 5 6 | 
             
            require 'set'
         | 
| 6 7 | 
             
            require "strscan"
         | 
| 7 8 |  | 
| 8 9 | 
             
            module REXML
         | 
| 9 10 | 
             
              module Parsers
         | 
| 11 | 
            +
                unless [].respond_to?(:tally)
         | 
| 12 | 
            +
                  module EnumerableTally
         | 
| 13 | 
            +
                    refine Enumerable do
         | 
| 14 | 
            +
                      def tally
         | 
| 15 | 
            +
                        counts = {}
         | 
| 16 | 
            +
                        each do |item|
         | 
| 17 | 
            +
                          counts[item] ||= 0
         | 
| 18 | 
            +
                          counts[item] += 1
         | 
| 19 | 
            +
                        end
         | 
| 20 | 
            +
                        counts
         | 
| 21 | 
            +
                      end
         | 
| 22 | 
            +
                    end
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
                  using EnumerableTally
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 10 27 | 
             
                if StringScanner::Version < "3.0.8"
         | 
| 11 28 | 
             
                  module StringScannerCaptures
         | 
| 12 29 | 
             
                    refine StringScanner do
         | 
| @@ -124,29 +141,22 @@ module REXML | |
| 124 141 | 
             
                  }
         | 
| 125 142 |  | 
| 126 143 | 
             
                  module Private
         | 
| 127 | 
            -
                     | 
| 128 | 
            -
                    INSTRUCTION_TERM = "?>"
         | 
| 129 | 
            -
                    COMMENT_TERM = "-->"
         | 
| 130 | 
            -
                    CDATA_TERM = "]]>"
         | 
| 131 | 
            -
                    DOCTYPE_TERM = "]>"
         | 
| 132 | 
            -
                    # Read to the end of DOCTYPE because there is no proper ENTITY termination
         | 
| 133 | 
            -
                    ENTITY_TERM = DOCTYPE_TERM
         | 
| 134 | 
            -
             | 
| 135 | 
            -
                    INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
         | 
| 144 | 
            +
                    PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
         | 
| 136 145 | 
             
                    TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
         | 
| 137 146 | 
             
                    CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
         | 
| 138 147 | 
             
                    ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
         | 
| 139 | 
            -
                    NAME_PATTERN =  | 
| 148 | 
            +
                    NAME_PATTERN = /#{NAME}/um
         | 
| 140 149 | 
             
                    GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
         | 
| 141 150 | 
             
                    PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
         | 
| 142 151 | 
             
                    ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
         | 
| 143 152 | 
             
                    CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
         | 
| 144 | 
            -
                    CHARACTER_REFERENCES = /&# | 
| 153 | 
            +
                    CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
         | 
| 145 154 | 
             
                    DEFAULT_ENTITIES_PATTERNS = {}
         | 
| 146 155 | 
             
                    default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
         | 
| 147 156 | 
             
                    default_entities.each do |term|
         | 
| 148 157 | 
             
                      DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
         | 
| 149 158 | 
             
                    end
         | 
| 159 | 
            +
                    XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
         | 
| 150 160 | 
             
                  end
         | 
| 151 161 | 
             
                  private_constant :Private
         | 
| 152 162 |  | 
| @@ -154,6 +164,10 @@ module REXML | |
| 154 164 | 
             
                    self.stream = source
         | 
| 155 165 | 
             
                    @listeners = []
         | 
| 156 166 | 
             
                    @prefixes = Set.new
         | 
| 167 | 
            +
                    @entity_expansion_count = 0
         | 
| 168 | 
            +
                    @entity_expansion_limit = Security.entity_expansion_limit
         | 
| 169 | 
            +
                    @entity_expansion_text_limit = Security.entity_expansion_text_limit
         | 
| 170 | 
            +
                    @source.ensure_buffer
         | 
| 157 171 | 
             
                  end
         | 
| 158 172 |  | 
| 159 173 | 
             
                  def add_listener( listener )
         | 
| @@ -161,16 +175,24 @@ module REXML | |
| 161 175 | 
             
                  end
         | 
| 162 176 |  | 
| 163 177 | 
             
                  attr_reader :source
         | 
| 178 | 
            +
                  attr_reader :entity_expansion_count
         | 
| 179 | 
            +
                  attr_writer :entity_expansion_limit
         | 
| 180 | 
            +
                  attr_writer :entity_expansion_text_limit
         | 
| 164 181 |  | 
| 165 182 | 
             
                  def stream=( source )
         | 
| 166 183 | 
             
                    @source = SourceFactory.create_from( source )
         | 
| 184 | 
            +
                    reset
         | 
| 185 | 
            +
                  end
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                  def reset
         | 
| 167 188 | 
             
                    @closed = nil
         | 
| 168 189 | 
             
                    @have_root = false
         | 
| 169 190 | 
             
                    @document_status = nil
         | 
| 170 191 | 
             
                    @tags = []
         | 
| 171 192 | 
             
                    @stack = []
         | 
| 172 193 | 
             
                    @entities = []
         | 
| 173 | 
            -
                    @ | 
| 194 | 
            +
                    @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
         | 
| 195 | 
            +
                    @namespaces_restore_stack = []
         | 
| 174 196 | 
             
                  end
         | 
| 175 197 |  | 
| 176 198 | 
             
                  def position
         | 
| @@ -238,6 +260,10 @@ module REXML | |
| 238 260 | 
             
                      if @document_status == :in_doctype
         | 
| 239 261 | 
             
                        raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
         | 
| 240 262 | 
             
                      end
         | 
| 263 | 
            +
                      unless @tags.empty?
         | 
| 264 | 
            +
                        path = "/" + @tags.join("/")
         | 
| 265 | 
            +
                        raise ParseException.new("Missing end tag for '#{path}'", @source)
         | 
| 266 | 
            +
                      end
         | 
| 241 267 | 
             
                      return [ :end_document ]
         | 
| 242 268 | 
             
                    end
         | 
| 243 269 | 
             
                    return @stack.shift if @stack.size > 0
         | 
| @@ -247,11 +273,11 @@ module REXML | |
| 247 273 | 
             
                    @source.ensure_buffer
         | 
| 248 274 | 
             
                    if @document_status == nil
         | 
| 249 275 | 
             
                      start_position = @source.position
         | 
| 250 | 
            -
                      if @source.match("<?", true)
         | 
| 251 | 
            -
                        return process_instruction | 
| 252 | 
            -
                      elsif @source.match("<!", true)
         | 
| 253 | 
            -
                        if @source.match("--", true)
         | 
| 254 | 
            -
                          md = @source.match(/(.*?)-->/um, true | 
| 276 | 
            +
                      if @source.match?("<?", true)
         | 
| 277 | 
            +
                        return process_instruction
         | 
| 278 | 
            +
                      elsif @source.match?("<!", true)
         | 
| 279 | 
            +
                        if @source.match?("--", true)
         | 
| 280 | 
            +
                          md = @source.match(/(.*?)-->/um, true)
         | 
| 255 281 | 
             
                          if md.nil?
         | 
| 256 282 | 
             
                            raise REXML::ParseException.new("Unclosed comment", @source)
         | 
| 257 283 | 
             
                          end
         | 
| @@ -259,10 +285,10 @@ module REXML | |
| 259 285 | 
             
                            raise REXML::ParseException.new("Malformed comment", @source)
         | 
| 260 286 | 
             
                          end
         | 
| 261 287 | 
             
                          return [ :comment, md[1] ]
         | 
| 262 | 
            -
                        elsif @source.match("DOCTYPE", true)
         | 
| 288 | 
            +
                        elsif @source.match?("DOCTYPE", true)
         | 
| 263 289 | 
             
                          base_error_message = "Malformed DOCTYPE"
         | 
| 264 | 
            -
                          unless @source.match(/\s+/um, true)
         | 
| 265 | 
            -
                            if @source.match(">")
         | 
| 290 | 
            +
                          unless @source.match?(/\s+/um, true)
         | 
| 291 | 
            +
                            if @source.match?(">")
         | 
| 266 292 | 
             
                              message = "#{base_error_message}: name is missing"
         | 
| 267 293 | 
             
                            else
         | 
| 268 294 | 
             
                              message = "#{base_error_message}: invalid name"
         | 
| @@ -270,12 +296,11 @@ module REXML | |
| 270 296 | 
             
                            @source.position = start_position
         | 
| 271 297 | 
             
                            raise REXML::ParseException.new(message, @source)
         | 
| 272 298 | 
             
                          end
         | 
| 273 | 
            -
                          @nsstack.unshift(Set.new)
         | 
| 274 299 | 
             
                          name = parse_name(base_error_message)
         | 
| 275 | 
            -
                          if @source.match(/\s*\[/um, true)
         | 
| 300 | 
            +
                          if @source.match?(/\s*\[/um, true)
         | 
| 276 301 | 
             
                            id = [nil, nil, nil]
         | 
| 277 302 | 
             
                            @document_status = :in_doctype
         | 
| 278 | 
            -
                          elsif @source.match(/\s*>/um, true)
         | 
| 303 | 
            +
                          elsif @source.match?(/\s*>/um, true)
         | 
| 279 304 | 
             
                            id = [nil, nil, nil]
         | 
| 280 305 | 
             
                            @document_status = :after_doctype
         | 
| 281 306 | 
             
                            @source.ensure_buffer
         | 
| @@ -287,9 +312,9 @@ module REXML | |
| 287 312 | 
             
                              # For backward compatibility
         | 
| 288 313 | 
             
                              id[1], id[2] = id[2], nil
         | 
| 289 314 | 
             
                            end
         | 
| 290 | 
            -
                            if @source.match(/\s*\[/um, true)
         | 
| 315 | 
            +
                            if @source.match?(/\s*\[/um, true)
         | 
| 291 316 | 
             
                              @document_status = :in_doctype
         | 
| 292 | 
            -
                            elsif @source.match(/\s*>/um, true)
         | 
| 317 | 
            +
                            elsif @source.match?(/\s*>/um, true)
         | 
| 293 318 | 
             
                              @document_status = :after_doctype
         | 
| 294 319 | 
             
                              @source.ensure_buffer
         | 
| 295 320 | 
             
                            else
         | 
| @@ -299,7 +324,7 @@ module REXML | |
| 299 324 | 
             
                          end
         | 
| 300 325 | 
             
                          args = [:start_doctype, name, *id]
         | 
| 301 326 | 
             
                          if @document_status == :after_doctype
         | 
| 302 | 
            -
                            @source.match(/\s*/um, true)
         | 
| 327 | 
            +
                            @source.match?(/\s*/um, true)
         | 
| 303 328 | 
             
                            @stack << [ :end_doctype ]
         | 
| 304 329 | 
             
                          end
         | 
| 305 330 | 
             
                          return args
         | 
| @@ -310,15 +335,19 @@ module REXML | |
| 310 335 | 
             
                      end
         | 
| 311 336 | 
             
                    end
         | 
| 312 337 | 
             
                    if @document_status == :in_doctype
         | 
| 313 | 
            -
                      @source.match(/\s*/um, true) # skip spaces
         | 
| 338 | 
            +
                      @source.match?(/\s*/um, true) # skip spaces
         | 
| 314 339 | 
             
                      start_position = @source.position
         | 
| 315 | 
            -
                      if @source.match("<!", true)
         | 
| 316 | 
            -
                        if @source.match("ELEMENT", true)
         | 
| 340 | 
            +
                      if @source.match?("<!", true)
         | 
| 341 | 
            +
                        if @source.match?("ELEMENT", true)
         | 
| 317 342 | 
             
                          md = @source.match(/(.*?)>/um, true)
         | 
| 318 343 | 
             
                          raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
         | 
| 319 344 | 
             
                          return [ :elementdecl, "<!ELEMENT" + md[1] ]
         | 
| 320 | 
            -
                        elsif @source.match("ENTITY", true)
         | 
| 321 | 
            -
                           | 
| 345 | 
            +
                        elsif @source.match?("ENTITY", true)
         | 
| 346 | 
            +
                          match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
         | 
| 347 | 
            +
                          unless match_data
         | 
| 348 | 
            +
                            raise REXML::ParseException.new("Malformed entity declaration", @source)
         | 
| 349 | 
            +
                          end
         | 
| 350 | 
            +
                          match = [:entitydecl, *match_data.captures.compact]
         | 
| 322 351 | 
             
                          ref = false
         | 
| 323 352 | 
             
                          if match[1] == '%'
         | 
| 324 353 | 
             
                            ref = true
         | 
| @@ -336,6 +365,8 @@ module REXML | |
| 336 365 | 
             
                            match[4] = match[4][1..-2] # HREF
         | 
| 337 366 | 
             
                            match.delete_at(5) if match.size > 5 # Chop out NDATA decl
         | 
| 338 367 | 
             
                            # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
         | 
| 368 | 
            +
                          elsif Private::PEREFERENCE_PATTERN.match?(match[2])
         | 
| 369 | 
            +
                            raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
         | 
| 339 370 | 
             
                          else
         | 
| 340 371 | 
             
                            match[2] = match[2][1..-2]
         | 
| 341 372 | 
             
                            match.pop if match.size == 4
         | 
| @@ -343,7 +374,7 @@ module REXML | |
| 343 374 | 
             
                          end
         | 
| 344 375 | 
             
                          match << '%' if ref
         | 
| 345 376 | 
             
                          return match
         | 
| 346 | 
            -
                        elsif @source.match("ATTLIST", true)
         | 
| 377 | 
            +
                        elsif @source.match?("ATTLIST", true)
         | 
| 347 378 | 
             
                          md = @source.match(Private::ATTLISTDECL_END, true)
         | 
| 348 379 | 
             
                          raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
         | 
| 349 380 | 
             
                          element = md[1]
         | 
| @@ -358,15 +389,15 @@ module REXML | |
| 358 389 | 
             
                              val = attdef[4] if val == "#FIXED "
         | 
| 359 390 | 
             
                              pairs[attdef[0]] = val
         | 
| 360 391 | 
             
                              if attdef[0] =~ /^xmlns:(.*)/
         | 
| 361 | 
            -
                                @ | 
| 392 | 
            +
                                @namespaces[$1] = val
         | 
| 362 393 | 
             
                              end
         | 
| 363 394 | 
             
                            end
         | 
| 364 395 | 
             
                          end
         | 
| 365 396 | 
             
                          return [ :attlistdecl, element, pairs, contents ]
         | 
| 366 | 
            -
                        elsif @source.match("NOTATION", true)
         | 
| 397 | 
            +
                        elsif @source.match?("NOTATION", true)
         | 
| 367 398 | 
             
                          base_error_message = "Malformed notation declaration"
         | 
| 368 | 
            -
                          unless @source.match(/\s+/um, true)
         | 
| 369 | 
            -
                            if @source.match(">")
         | 
| 399 | 
            +
                          unless @source.match?(/\s+/um, true)
         | 
| 400 | 
            +
                            if @source.match?(">")
         | 
| 370 401 | 
             
                              message = "#{base_error_message}: name is missing"
         | 
| 371 402 | 
             
                            else
         | 
| 372 403 | 
             
                              message = "#{base_error_message}: invalid name"
         | 
| @@ -378,21 +409,21 @@ module REXML | |
| 378 409 | 
             
                          id = parse_id(base_error_message,
         | 
| 379 410 | 
             
                                        accept_external_id: true,
         | 
| 380 411 | 
             
                                        accept_public_id: true)
         | 
| 381 | 
            -
                          unless @source.match(/\s*>/um, true)
         | 
| 412 | 
            +
                          unless @source.match?(/\s*>/um, true)
         | 
| 382 413 | 
             
                            message = "#{base_error_message}: garbage before end >"
         | 
| 383 414 | 
             
                            raise REXML::ParseException.new(message, @source)
         | 
| 384 415 | 
             
                          end
         | 
| 385 416 | 
             
                          return [:notationdecl, name, *id]
         | 
| 386 | 
            -
                        elsif md = @source.match(/--(.*?)-->/um, true | 
| 417 | 
            +
                        elsif md = @source.match(/--(.*?)-->/um, true)
         | 
| 387 418 | 
             
                          case md[1]
         | 
| 388 419 | 
             
                          when /--/, /-\z/
         | 
| 389 420 | 
             
                            raise REXML::ParseException.new("Malformed comment", @source)
         | 
| 390 421 | 
             
                          end
         | 
| 391 422 | 
             
                          return [ :comment, md[1] ] if md
         | 
| 392 423 | 
             
                        end
         | 
| 393 | 
            -
                      elsif match = @source.match(/(%.*?;)\s*/um, true | 
| 424 | 
            +
                      elsif match = @source.match(/(%.*?;)\s*/um, true)
         | 
| 394 425 | 
             
                        return [ :externalentity, match[1] ]
         | 
| 395 | 
            -
                      elsif @source.match(/\]\s*>/um, true)
         | 
| 426 | 
            +
                      elsif @source.match?(/\]\s*>/um, true)
         | 
| 396 427 | 
             
                        @document_status = :after_doctype
         | 
| 397 428 | 
             
                        return [ :end_doctype ]
         | 
| 398 429 | 
             
                      end
         | 
| @@ -401,17 +432,17 @@ module REXML | |
| 401 432 | 
             
                      end
         | 
| 402 433 | 
             
                    end
         | 
| 403 434 | 
             
                    if @document_status == :after_doctype
         | 
| 404 | 
            -
                      @source.match(/\s*/um, true)
         | 
| 435 | 
            +
                      @source.match?(/\s*/um, true)
         | 
| 405 436 | 
             
                    end
         | 
| 406 437 | 
             
                    begin
         | 
| 407 438 | 
             
                      start_position = @source.position
         | 
| 408 | 
            -
                      if @source.match("<", true)
         | 
| 439 | 
            +
                      if @source.match?("<", true)
         | 
| 409 440 | 
             
                        # :text's read_until may remain only "<" in buffer. In the
         | 
| 410 441 | 
             
                        # case, buffer is empty here. So we need to fill buffer
         | 
| 411 442 | 
             
                        # here explicitly.
         | 
| 412 443 | 
             
                        @source.ensure_buffer
         | 
| 413 | 
            -
                        if @source.match("/", true)
         | 
| 414 | 
            -
                          @ | 
| 444 | 
            +
                        if @source.match?("/", true)
         | 
| 445 | 
            +
                          @namespaces_restore_stack.pop
         | 
| 415 446 | 
             
                          last_tag = @tags.pop
         | 
| 416 447 | 
             
                          md = @source.match(Private::CLOSE_PATTERN, true)
         | 
| 417 448 | 
             
                          if md and !last_tag
         | 
| @@ -425,12 +456,12 @@ module REXML | |
| 425 456 | 
             
                            raise REXML::ParseException.new(message, @source)
         | 
| 426 457 | 
             
                          end
         | 
| 427 458 | 
             
                          return [ :end_element, last_tag ]
         | 
| 428 | 
            -
                        elsif @source.match("!", true)
         | 
| 459 | 
            +
                        elsif @source.match?("!", true)
         | 
| 429 460 | 
             
                          md = @source.match(/([^>]*>)/um)
         | 
| 430 461 | 
             
                          #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
         | 
| 431 462 | 
             
                          raise REXML::ParseException.new("Malformed node", @source) unless md
         | 
| 432 463 | 
             
                          if md[0][0] == ?-
         | 
| 433 | 
            -
                            md = @source.match(/--(.*?)-->/um, true | 
| 464 | 
            +
                            md = @source.match(/--(.*?)-->/um, true)
         | 
| 434 465 |  | 
| 435 466 | 
             
                            if md.nil? || /--|-\z/.match?(md[1])
         | 
| 436 467 | 
             
                              raise REXML::ParseException.new("Malformed comment", @source)
         | 
| @@ -438,13 +469,13 @@ module REXML | |
| 438 469 |  | 
| 439 470 | 
             
                            return [ :comment, md[1] ]
         | 
| 440 471 | 
             
                          else
         | 
| 441 | 
            -
                            md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true | 
| 472 | 
            +
                            md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
         | 
| 442 473 | 
             
                            return [ :cdata, md[1] ] if md
         | 
| 443 474 | 
             
                          end
         | 
| 444 475 | 
             
                          raise REXML::ParseException.new( "Declarations can only occur "+
         | 
| 445 476 | 
             
                            "in the doctype declaration.", @source)
         | 
| 446 | 
            -
                        elsif @source.match("?", true)
         | 
| 447 | 
            -
                          return process_instruction | 
| 477 | 
            +
                        elsif @source.match?("?", true)
         | 
| 478 | 
            +
                          return process_instruction
         | 
| 448 479 | 
             
                        else
         | 
| 449 480 | 
             
                          # Get the next tag
         | 
| 450 481 | 
             
                          md = @source.match(Private::TAG_PATTERN, true)
         | 
| @@ -456,18 +487,18 @@ module REXML | |
| 456 487 | 
             
                          @document_status = :in_element
         | 
| 457 488 | 
             
                          @prefixes.clear
         | 
| 458 489 | 
             
                          @prefixes << md[2] if md[2]
         | 
| 459 | 
            -
                           | 
| 460 | 
            -
                          attributes, closed = parse_attributes(@prefixes | 
| 490 | 
            +
                          push_namespaces_restore
         | 
| 491 | 
            +
                          attributes, closed = parse_attributes(@prefixes)
         | 
| 461 492 | 
             
                          # Verify that all of the prefixes have been defined
         | 
| 462 493 | 
             
                          for prefix in @prefixes
         | 
| 463 | 
            -
                            unless @ | 
| 494 | 
            +
                            unless @namespaces.key?(prefix)
         | 
| 464 495 | 
             
                              raise UndefinedNamespaceException.new(prefix,@source,self)
         | 
| 465 496 | 
             
                            end
         | 
| 466 497 | 
             
                          end
         | 
| 467 498 |  | 
| 468 499 | 
             
                          if closed
         | 
| 469 500 | 
             
                            @closed = tag
         | 
| 470 | 
            -
                             | 
| 501 | 
            +
                            pop_namespaces_restore
         | 
| 471 502 | 
             
                          else
         | 
| 472 503 | 
             
                            if @tags.empty? and @have_root
         | 
| 473 504 | 
             
                              raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
         | 
| @@ -482,11 +513,15 @@ module REXML | |
| 482 513 | 
             
                        if text.chomp!("<")
         | 
| 483 514 | 
             
                          @source.position -= "<".bytesize
         | 
| 484 515 | 
             
                        end
         | 
| 485 | 
            -
                        if @tags.empty? | 
| 516 | 
            +
                        if @tags.empty?
         | 
| 486 517 | 
             
                          unless /\A\s*\z/.match?(text)
         | 
| 487 | 
            -
                             | 
| 518 | 
            +
                            if @have_root
         | 
| 519 | 
            +
                              raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
         | 
| 520 | 
            +
                            else
         | 
| 521 | 
            +
                              raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
         | 
| 522 | 
            +
                            end
         | 
| 488 523 | 
             
                          end
         | 
| 489 | 
            -
                          return pull_event
         | 
| 524 | 
            +
                          return pull_event if @have_root
         | 
| 490 525 | 
             
                        end
         | 
| 491 526 | 
             
                        return [ :text, text ]
         | 
| 492 527 | 
             
                      end
         | 
| @@ -503,13 +538,13 @@ module REXML | |
| 503 538 | 
             
                  private :pull_event
         | 
| 504 539 |  | 
| 505 540 | 
             
                  def entity( reference, entities )
         | 
| 506 | 
            -
                     | 
| 507 | 
            -
             | 
| 508 | 
            -
                     | 
| 509 | 
            -
             | 
| 510 | 
            -
             | 
| 511 | 
            -
                     | 
| 512 | 
            -
                    unnormalize( value, entities ) | 
| 541 | 
            +
                    return unless entities
         | 
| 542 | 
            +
             | 
| 543 | 
            +
                    value = entities[ reference ]
         | 
| 544 | 
            +
                    return if value.nil?
         | 
| 545 | 
            +
             | 
| 546 | 
            +
                    record_entity_expansion
         | 
| 547 | 
            +
                    unnormalize( value, entities )
         | 
| 513 548 | 
             
                  end
         | 
| 514 549 |  | 
| 515 550 | 
             
                  # Escapes all possible entities
         | 
| @@ -539,21 +574,37 @@ module REXML | |
| 539 574 | 
             
                    return rv if matches.size == 0
         | 
| 540 575 | 
             
                    rv.gsub!( Private::CHARACTER_REFERENCES ) {
         | 
| 541 576 | 
             
                      m=$1
         | 
| 542 | 
            -
                       | 
| 543 | 
            -
             | 
| 577 | 
            +
                      if m.start_with?("x")
         | 
| 578 | 
            +
                        code_point = Integer(m[1..-1], 16)
         | 
| 579 | 
            +
                      else
         | 
| 580 | 
            +
                        code_point = Integer(m, 10)
         | 
| 581 | 
            +
                      end
         | 
| 582 | 
            +
                      [code_point].pack('U*')
         | 
| 544 583 | 
             
                    }
         | 
| 545 584 | 
             
                    matches.collect!{|x|x[0]}.compact!
         | 
| 585 | 
            +
                    if filter
         | 
| 586 | 
            +
                      matches.reject! do |entity_reference|
         | 
| 587 | 
            +
                        filter.include?(entity_reference)
         | 
| 588 | 
            +
                      end
         | 
| 589 | 
            +
                    end
         | 
| 546 590 | 
             
                    if matches.size > 0
         | 
| 547 | 
            -
                      matches.each do |entity_reference|
         | 
| 548 | 
            -
                         | 
| 549 | 
            -
             | 
| 550 | 
            -
             | 
| 551 | 
            -
             | 
| 552 | 
            -
                             | 
| 553 | 
            -
             | 
| 554 | 
            -
                             | 
| 555 | 
            -
             | 
| 591 | 
            +
                      matches.tally.each do |entity_reference, n|
         | 
| 592 | 
            +
                        entity_expansion_count_before = @entity_expansion_count
         | 
| 593 | 
            +
                        entity_value = entity( entity_reference, entities )
         | 
| 594 | 
            +
                        if entity_value
         | 
| 595 | 
            +
                          if n > 1
         | 
| 596 | 
            +
                            entity_expansion_count_delta =
         | 
| 597 | 
            +
                              @entity_expansion_count - entity_expansion_count_before
         | 
| 598 | 
            +
                            record_entity_expansion(entity_expansion_count_delta * (n - 1))
         | 
| 599 | 
            +
                          end
         | 
| 600 | 
            +
                          re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
         | 
| 601 | 
            +
                          rv.gsub!( re, entity_value )
         | 
| 602 | 
            +
                          if rv.bytesize > @entity_expansion_text_limit
         | 
| 603 | 
            +
                            raise "entity expansion has grown too large"
         | 
| 556 604 | 
             
                          end
         | 
| 605 | 
            +
                        else
         | 
| 606 | 
            +
                          er = DEFAULT_ENTITIES[entity_reference]
         | 
| 607 | 
            +
                          rv.gsub!( er[0], er[2] ) if er
         | 
| 557 608 | 
             
                        end
         | 
| 558 609 | 
             
                      end
         | 
| 559 610 | 
             
                      rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
         | 
| @@ -562,6 +613,39 @@ module REXML | |
| 562 613 | 
             
                  end
         | 
| 563 614 |  | 
| 564 615 | 
             
                  private
         | 
| 616 | 
            +
                  def add_namespace(prefix, uri)
         | 
| 617 | 
            +
                    @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
         | 
| 618 | 
            +
                    if uri.nil?
         | 
| 619 | 
            +
                      @namespaces.delete(prefix)
         | 
| 620 | 
            +
                    else
         | 
| 621 | 
            +
                      @namespaces[prefix] = uri
         | 
| 622 | 
            +
                    end
         | 
| 623 | 
            +
                  end
         | 
| 624 | 
            +
             | 
| 625 | 
            +
                  def push_namespaces_restore
         | 
| 626 | 
            +
                    namespaces_restore = {}
         | 
| 627 | 
            +
                    @namespaces_restore_stack.push(namespaces_restore)
         | 
| 628 | 
            +
                    namespaces_restore
         | 
| 629 | 
            +
                  end
         | 
| 630 | 
            +
             | 
| 631 | 
            +
                  def pop_namespaces_restore
         | 
| 632 | 
            +
                    namespaces_restore = @namespaces_restore_stack.pop
         | 
| 633 | 
            +
                    namespaces_restore.each do |prefix, uri|
         | 
| 634 | 
            +
                      if uri.nil?
         | 
| 635 | 
            +
                        @namespaces.delete(prefix)
         | 
| 636 | 
            +
                      else
         | 
| 637 | 
            +
                        @namespaces[prefix] = uri
         | 
| 638 | 
            +
                      end
         | 
| 639 | 
            +
                    end
         | 
| 640 | 
            +
                  end
         | 
| 641 | 
            +
             | 
| 642 | 
            +
                  def record_entity_expansion(delta=1)
         | 
| 643 | 
            +
                    @entity_expansion_count += delta
         | 
| 644 | 
            +
                    if @entity_expansion_count > @entity_expansion_limit
         | 
| 645 | 
            +
                      raise "number of entity expansions exceeded, processing aborted."
         | 
| 646 | 
            +
                    end
         | 
| 647 | 
            +
                  end
         | 
| 648 | 
            +
             | 
| 565 649 | 
             
                  def need_source_encoding_update?(xml_declaration_encoding)
         | 
| 566 650 | 
             
                    return false if xml_declaration_encoding.nil?
         | 
| 567 651 | 
             
                    return false if /\AUTF-16\z/i =~ xml_declaration_encoding
         | 
| @@ -571,14 +655,14 @@ module REXML | |
| 571 655 | 
             
                  def parse_name(base_error_message)
         | 
| 572 656 | 
             
                    md = @source.match(Private::NAME_PATTERN, true)
         | 
| 573 657 | 
             
                    unless md
         | 
| 574 | 
            -
                      if @source.match(/\ | 
| 658 | 
            +
                      if @source.match?(/\S/um)
         | 
| 575 659 | 
             
                        message = "#{base_error_message}: invalid name"
         | 
| 576 660 | 
             
                      else
         | 
| 577 661 | 
             
                        message = "#{base_error_message}: name is missing"
         | 
| 578 662 | 
             
                      end
         | 
| 579 663 | 
             
                      raise REXML::ParseException.new(message, @source)
         | 
| 580 664 | 
             
                    end
         | 
| 581 | 
            -
                    md[ | 
| 665 | 
            +
                    md[0]
         | 
| 582 666 | 
             
                  end
         | 
| 583 667 |  | 
| 584 668 | 
             
                  def parse_id(base_error_message,
         | 
| @@ -613,52 +697,58 @@ module REXML | |
| 613 697 | 
             
                                               accept_public_id:)
         | 
| 614 698 | 
             
                    public = /\A\s*PUBLIC/um
         | 
| 615 699 | 
             
                    system = /\A\s*SYSTEM/um
         | 
| 616 | 
            -
                    if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
         | 
| 617 | 
            -
                      if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 700 | 
            +
                    if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
         | 
| 701 | 
            +
                      if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 618 702 | 
             
                        return "public ID literal is missing"
         | 
| 619 703 | 
             
                      end
         | 
| 620 | 
            -
                      unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
         | 
| 704 | 
            +
                      unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
         | 
| 621 705 | 
             
                        return "invalid public ID literal"
         | 
| 622 706 | 
             
                      end
         | 
| 623 707 | 
             
                      if accept_public_id
         | 
| 624 | 
            -
                        if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
         | 
| 708 | 
            +
                        if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
         | 
| 625 709 | 
             
                          return "system ID literal is missing"
         | 
| 626 710 | 
             
                        end
         | 
| 627 | 
            -
                        unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
         | 
| 711 | 
            +
                        unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
         | 
| 628 712 | 
             
                          return "invalid system literal"
         | 
| 629 713 | 
             
                        end
         | 
| 630 714 | 
             
                        "garbage after system literal"
         | 
| 631 715 | 
             
                      else
         | 
| 632 716 | 
             
                        "garbage after public ID literal"
         | 
| 633 717 | 
             
                      end
         | 
| 634 | 
            -
                    elsif accept_external_id and @source.match(/#{system}/um)
         | 
| 635 | 
            -
                      if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 718 | 
            +
                    elsif accept_external_id and @source.match?(/#{system}/um)
         | 
| 719 | 
            +
                      if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 636 720 | 
             
                        return "system literal is missing"
         | 
| 637 721 | 
             
                      end
         | 
| 638 | 
            -
                      unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
         | 
| 722 | 
            +
                      unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
         | 
| 639 723 | 
             
                        return "invalid system literal"
         | 
| 640 724 | 
             
                      end
         | 
| 641 725 | 
             
                      "garbage after system literal"
         | 
| 642 726 | 
             
                    else
         | 
| 643 | 
            -
                      unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
         | 
| 727 | 
            +
                      unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
         | 
| 644 728 | 
             
                        return "invalid ID type"
         | 
| 645 729 | 
             
                      end
         | 
| 646 730 | 
             
                      "ID type is missing"
         | 
| 647 731 | 
             
                    end
         | 
| 648 732 | 
             
                  end
         | 
| 649 733 |  | 
| 650 | 
            -
                  def process_instruction | 
| 651 | 
            -
                     | 
| 652 | 
            -
                     | 
| 653 | 
            -
                       | 
| 654 | 
            -
                       | 
| 655 | 
            -
             | 
| 734 | 
            +
                  def process_instruction
         | 
| 735 | 
            +
                    name = parse_name("Malformed XML: Invalid processing instruction node")
         | 
| 736 | 
            +
                    if @source.match?(/\s+/um, true)
         | 
| 737 | 
            +
                      match_data = @source.match(/(.*?)\?>/um, true)
         | 
| 738 | 
            +
                      unless match_data
         | 
| 739 | 
            +
                        raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
         | 
| 740 | 
            +
                      end
         | 
| 741 | 
            +
                      content = match_data[1]
         | 
| 742 | 
            +
                    else
         | 
| 743 | 
            +
                      content = nil
         | 
| 744 | 
            +
                      unless @source.match?("?>", true)
         | 
| 745 | 
            +
                        raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
         | 
| 746 | 
            +
                      end
         | 
| 656 747 | 
             
                    end
         | 
| 657 | 
            -
                    if  | 
| 748 | 
            +
                    if name == "xml"
         | 
| 658 749 | 
             
                      if @document_status
         | 
| 659 750 | 
             
                        raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
         | 
| 660 751 | 
             
                      end
         | 
| 661 | 
            -
                      content = match_data[2]
         | 
| 662 752 | 
             
                      version = VERSION.match(content)
         | 
| 663 753 | 
             
                      version = version[1] unless version.nil?
         | 
| 664 754 | 
             
                      encoding = ENCODING.match(content)
         | 
| @@ -673,16 +763,17 @@ module REXML | |
| 673 763 | 
             
                      standalone = standalone[1] unless standalone.nil?
         | 
| 674 764 | 
             
                      return [ :xmldecl, version, encoding, standalone ]
         | 
| 675 765 | 
             
                    end
         | 
| 676 | 
            -
                    [:processing_instruction,  | 
| 766 | 
            +
                    [:processing_instruction, name, content]
         | 
| 677 767 | 
             
                  end
         | 
| 678 768 |  | 
| 679 | 
            -
                  def parse_attributes(prefixes | 
| 769 | 
            +
                  def parse_attributes(prefixes)
         | 
| 680 770 | 
             
                    attributes = {}
         | 
| 771 | 
            +
                    expanded_names = {}
         | 
| 681 772 | 
             
                    closed = false
         | 
| 682 773 | 
             
                    while true
         | 
| 683 | 
            -
                      if @source.match(">", true)
         | 
| 774 | 
            +
                      if @source.match?(">", true)
         | 
| 684 775 | 
             
                        return attributes, closed
         | 
| 685 | 
            -
                      elsif @source.match("/>", true)
         | 
| 776 | 
            +
                      elsif @source.match?("/>", true)
         | 
| 686 777 | 
             
                        closed = true
         | 
| 687 778 | 
             
                        return attributes, closed
         | 
| 688 779 | 
             
                      elsif match = @source.match(QNAME, true)
         | 
| @@ -690,7 +781,7 @@ module REXML | |
| 690 781 | 
             
                        prefix = match[2]
         | 
| 691 782 | 
             
                        local_part = match[3]
         | 
| 692 783 |  | 
| 693 | 
            -
                        unless @source.match(/\s*=\s*/um, true)
         | 
| 784 | 
            +
                        unless @source.match?(/\s*=\s*/um, true)
         | 
| 694 785 | 
             
                          message = "Missing attribute equal: <#{name}>"
         | 
| 695 786 | 
             
                          raise REXML::ParseException.new(message, @source)
         | 
| 696 787 | 
             
                        end
         | 
| @@ -706,10 +797,10 @@ module REXML | |
| 706 797 | 
             
                          message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
         | 
| 707 798 | 
             
                          raise REXML::ParseException.new(message, @source)
         | 
| 708 799 | 
             
                        end
         | 
| 709 | 
            -
                        @source.match(/\s*/um, true)
         | 
| 800 | 
            +
                        @source.match?(/\s*/um, true)
         | 
| 710 801 | 
             
                        if prefix == "xmlns"
         | 
| 711 802 | 
             
                          if local_part == "xml"
         | 
| 712 | 
            -
                            if value !=  | 
| 803 | 
            +
                            if value != Private::XML_PREFIXED_NAMESPACE
         | 
| 713 804 | 
             
                              msg = "The 'xml' prefix must not be bound to any other namespace "+
         | 
| 714 805 | 
             
                                "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
         | 
| 715 806 | 
             
                              raise REXML::ParseException.new( msg, @source, self )
         | 
| @@ -719,7 +810,7 @@ module REXML | |
| 719 810 | 
             
                              "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
         | 
| 720 811 | 
             
                            raise REXML::ParseException.new( msg, @source, self)
         | 
| 721 812 | 
             
                          end
         | 
| 722 | 
            -
                           | 
| 813 | 
            +
                          add_namespace(local_part, value)
         | 
| 723 814 | 
             
                        elsif prefix
         | 
| 724 815 | 
             
                          prefixes << prefix unless prefix == "xml"
         | 
| 725 816 | 
             
                        end
         | 
| @@ -729,6 +820,20 @@ module REXML | |
| 729 820 | 
             
                          raise REXML::ParseException.new(msg, @source, self)
         | 
| 730 821 | 
             
                        end
         | 
| 731 822 |  | 
| 823 | 
            +
                        unless prefix == "xmlns"
         | 
| 824 | 
            +
                          uri = @namespaces[prefix]
         | 
| 825 | 
            +
                          expanded_name = [uri, local_part]
         | 
| 826 | 
            +
                          existing_prefix = expanded_names[expanded_name]
         | 
| 827 | 
            +
                          if existing_prefix
         | 
| 828 | 
            +
                            message = "Namespace conflict in adding attribute " +
         | 
| 829 | 
            +
                                      "\"#{local_part}\": " +
         | 
| 830 | 
            +
                                      "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
         | 
| 831 | 
            +
                                      "prefix \"#{prefix}\" = \"#{uri}\""
         | 
| 832 | 
            +
                            raise REXML::ParseException.new(message, @source, self)
         | 
| 833 | 
            +
                          end
         | 
| 834 | 
            +
                          expanded_names[expanded_name] = prefix
         | 
| 835 | 
            +
                        end
         | 
| 836 | 
            +
             | 
| 732 837 | 
             
                        attributes[name] = value
         | 
| 733 838 | 
             
                      else
         | 
| 734 839 | 
             
                        message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
         | 
| @@ -47,6 +47,18 @@ module REXML | |
| 47 47 | 
             
                    @listeners << listener
         | 
| 48 48 | 
             
                  end
         | 
| 49 49 |  | 
| 50 | 
            +
                  def entity_expansion_count
         | 
| 51 | 
            +
                    @parser.entity_expansion_count
         | 
| 52 | 
            +
                  end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  def entity_expansion_limit=( limit )
         | 
| 55 | 
            +
                    @parser.entity_expansion_limit = limit
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  def entity_expansion_text_limit=( limit )
         | 
| 59 | 
            +
                    @parser.entity_expansion_text_limit = limit
         | 
| 60 | 
            +
                  end
         | 
| 61 | 
            +
             | 
| 50 62 | 
             
                  def each
         | 
| 51 63 | 
             
                    while has_next?
         | 
| 52 64 | 
             
                      yield self.pull
         | 
| @@ -81,6 +93,10 @@ module REXML | |
| 81 93 | 
             
                  def unshift token
         | 
| 82 94 | 
             
                    @my_stack.unshift token
         | 
| 83 95 | 
             
                  end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                  def reset
         | 
| 98 | 
            +
                    @parser.reset
         | 
| 99 | 
            +
                  end
         | 
| 84 100 | 
             
                end
         | 
| 85 101 |  | 
| 86 102 | 
             
                # A parsing event.  The contents of the event are accessed as an +Array?,
         |