rexml 3.3.9 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +53 -0
- data/lib/rexml/parsers/baseparser.rb +70 -45
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +55 -6
- data/lib/rexml/text.rb +15 -40
- metadata +4 -4
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
         | 
| 4 | 
            +
              data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
         | 
| 7 | 
            +
              data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
         | 
    
        data/NEWS.md
    CHANGED
    
    | @@ -1,5 +1,58 @@ | |
| 1 1 | 
             
            # News
         | 
| 2 2 |  | 
| 3 | 
            +
            ## 3.4.1 - 2025-02-16 {#version-3-4-1}
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ### Improvement
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              * Improved performance.
         | 
| 8 | 
            +
                * GH-226
         | 
| 9 | 
            +
                * GH-227
         | 
| 10 | 
            +
                * GH-237
         | 
| 11 | 
            +
                * Patch by NAITOH Jun
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            ### Fixes
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              * Fix serialization of ATTLIST is incorrect
         | 
| 16 | 
            +
                * GH-233
         | 
| 17 | 
            +
                * GH-234
         | 
| 18 | 
            +
                * Patch by OlofKalufs
         | 
| 19 | 
            +
                * Reported by OlofKalufs
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            ### Thanks
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              * NAITOH Jun
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              * OlofKalufs
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            ## 3.4.0 - 2024-12-15 {#version-3-4-0}
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            ### Improvement
         | 
| 30 | 
            +
             | 
| 31 | 
            +
              * Improved performance.
         | 
| 32 | 
            +
                * GH-216
         | 
| 33 | 
            +
                * Patch by NAITOH Jun
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              * JRuby: Improved parse performance.
         | 
| 36 | 
            +
                * GH-219
         | 
| 37 | 
            +
                * Patch by João Duarte
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              * Added support for reusing pull parser.
         | 
| 40 | 
            +
                * GH-214
         | 
| 41 | 
            +
                * GH-220
         | 
| 42 | 
            +
                * Patch by Dmitry Pogrebnoy
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              * Improved error handling when source is `IO`.
         | 
| 45 | 
            +
                * GH-221
         | 
| 46 | 
            +
                * Patch by NAITOH Jun
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            ### Thanks
         | 
| 49 | 
            +
             | 
| 50 | 
            +
              * NAITOH Jun
         | 
| 51 | 
            +
             | 
| 52 | 
            +
              * João Duarte
         | 
| 53 | 
            +
             | 
| 54 | 
            +
              * Dmitry Pogrebnoy
         | 
| 55 | 
            +
             | 
| 3 56 | 
             
            ## 3.3.9 - 2024-10-24 {#version-3-3-9}
         | 
| 4 57 |  | 
| 5 58 | 
             
            ### Improvements
         | 
| @@ -181,6 +181,10 @@ module REXML | |
| 181 181 |  | 
| 182 182 | 
             
                  def stream=( source )
         | 
| 183 183 | 
             
                    @source = SourceFactory.create_from( source )
         | 
| 184 | 
            +
                    reset
         | 
| 185 | 
            +
                  end
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                  def reset
         | 
| 184 188 | 
             
                    @closed = nil
         | 
| 185 189 | 
             
                    @have_root = false
         | 
| 186 190 | 
             
                    @document_status = nil
         | 
| @@ -269,10 +273,10 @@ module REXML | |
| 269 273 | 
             
                    @source.ensure_buffer
         | 
| 270 274 | 
             
                    if @document_status == nil
         | 
| 271 275 | 
             
                      start_position = @source.position
         | 
| 272 | 
            -
                      if @source.match("<?", true)
         | 
| 276 | 
            +
                      if @source.match?("<?", true)
         | 
| 273 277 | 
             
                        return process_instruction
         | 
| 274 | 
            -
                      elsif @source.match("<!", true)
         | 
| 275 | 
            -
                        if @source.match("--", true)
         | 
| 278 | 
            +
                      elsif @source.match?("<!", true)
         | 
| 279 | 
            +
                        if @source.match?("--", true)
         | 
| 276 280 | 
             
                          md = @source.match(/(.*?)-->/um, true)
         | 
| 277 281 | 
             
                          if md.nil?
         | 
| 278 282 | 
             
                            raise REXML::ParseException.new("Unclosed comment", @source)
         | 
| @@ -281,10 +285,10 @@ module REXML | |
| 281 285 | 
             
                            raise REXML::ParseException.new("Malformed comment", @source)
         | 
| 282 286 | 
             
                          end
         | 
| 283 287 | 
             
                          return [ :comment, md[1] ]
         | 
| 284 | 
            -
                        elsif @source.match("DOCTYPE", true)
         | 
| 288 | 
            +
                        elsif @source.match?("DOCTYPE", true)
         | 
| 285 289 | 
             
                          base_error_message = "Malformed DOCTYPE"
         | 
| 286 | 
            -
                          unless @source.match(/\s+/um, true)
         | 
| 287 | 
            -
                            if @source.match(">")
         | 
| 290 | 
            +
                          unless @source.match?(/\s+/um, true)
         | 
| 291 | 
            +
                            if @source.match?(">")
         | 
| 288 292 | 
             
                              message = "#{base_error_message}: name is missing"
         | 
| 289 293 | 
             
                            else
         | 
| 290 294 | 
             
                              message = "#{base_error_message}: invalid name"
         | 
| @@ -293,10 +297,11 @@ module REXML | |
| 293 297 | 
             
                            raise REXML::ParseException.new(message, @source)
         | 
| 294 298 | 
             
                          end
         | 
| 295 299 | 
             
                          name = parse_name(base_error_message)
         | 
| 296 | 
            -
                           | 
| 300 | 
            +
                          @source.match?(/\s*/um, true) # skip spaces
         | 
| 301 | 
            +
                          if @source.match?("[", true)
         | 
| 297 302 | 
             
                            id = [nil, nil, nil]
         | 
| 298 303 | 
             
                            @document_status = :in_doctype
         | 
| 299 | 
            -
                          elsif @source.match( | 
| 304 | 
            +
                          elsif @source.match?(">", true)
         | 
| 300 305 | 
             
                            id = [nil, nil, nil]
         | 
| 301 306 | 
             
                            @document_status = :after_doctype
         | 
| 302 307 | 
             
                            @source.ensure_buffer
         | 
| @@ -308,9 +313,10 @@ module REXML | |
| 308 313 | 
             
                              # For backward compatibility
         | 
| 309 314 | 
             
                              id[1], id[2] = id[2], nil
         | 
| 310 315 | 
             
                            end
         | 
| 311 | 
            -
                             | 
| 316 | 
            +
                            @source.match?(/\s*/um, true) # skip spaces
         | 
| 317 | 
            +
                            if @source.match?("[", true)
         | 
| 312 318 | 
             
                              @document_status = :in_doctype
         | 
| 313 | 
            -
                            elsif @source.match( | 
| 319 | 
            +
                            elsif @source.match?(">", true)
         | 
| 314 320 | 
             
                              @document_status = :after_doctype
         | 
| 315 321 | 
             
                              @source.ensure_buffer
         | 
| 316 322 | 
             
                            else
         | 
| @@ -320,7 +326,7 @@ module REXML | |
| 320 326 | 
             
                          end
         | 
| 321 327 | 
             
                          args = [:start_doctype, name, *id]
         | 
| 322 328 | 
             
                          if @document_status == :after_doctype
         | 
| 323 | 
            -
                            @source.match(/\s*/um, true)
         | 
| 329 | 
            +
                            @source.match?(/\s*/um, true)
         | 
| 324 330 | 
             
                            @stack << [ :end_doctype ]
         | 
| 325 331 | 
             
                          end
         | 
| 326 332 | 
             
                          return args
         | 
| @@ -331,14 +337,14 @@ module REXML | |
| 331 337 | 
             
                      end
         | 
| 332 338 | 
             
                    end
         | 
| 333 339 | 
             
                    if @document_status == :in_doctype
         | 
| 334 | 
            -
                      @source.match(/\s*/um, true) # skip spaces
         | 
| 340 | 
            +
                      @source.match?(/\s*/um, true) # skip spaces
         | 
| 335 341 | 
             
                      start_position = @source.position
         | 
| 336 | 
            -
                      if @source.match("<!", true)
         | 
| 337 | 
            -
                        if @source.match("ELEMENT", true)
         | 
| 342 | 
            +
                      if @source.match?("<!", true)
         | 
| 343 | 
            +
                        if @source.match?("ELEMENT", true)
         | 
| 338 344 | 
             
                          md = @source.match(/(.*?)>/um, true)
         | 
| 339 345 | 
             
                          raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
         | 
| 340 346 | 
             
                          return [ :elementdecl, "<!ELEMENT" + md[1] ]
         | 
| 341 | 
            -
                        elsif @source.match("ENTITY", true)
         | 
| 347 | 
            +
                        elsif @source.match?("ENTITY", true)
         | 
| 342 348 | 
             
                          match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
         | 
| 343 349 | 
             
                          unless match_data
         | 
| 344 350 | 
             
                            raise REXML::ParseException.new("Malformed entity declaration", @source)
         | 
| @@ -370,11 +376,11 @@ module REXML | |
| 370 376 | 
             
                          end
         | 
| 371 377 | 
             
                          match << '%' if ref
         | 
| 372 378 | 
             
                          return match
         | 
| 373 | 
            -
                        elsif @source.match("ATTLIST", true)
         | 
| 379 | 
            +
                        elsif @source.match?("ATTLIST", true)
         | 
| 374 380 | 
             
                          md = @source.match(Private::ATTLISTDECL_END, true)
         | 
| 375 381 | 
             
                          raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
         | 
| 376 382 | 
             
                          element = md[1]
         | 
| 377 | 
            -
                          contents = md[0]
         | 
| 383 | 
            +
                          contents = "<!ATTLIST" + md[0]
         | 
| 378 384 |  | 
| 379 385 | 
             
                          pairs = {}
         | 
| 380 386 | 
             
                          values = md[0].strip.scan( ATTDEF_RE )
         | 
| @@ -390,10 +396,10 @@ module REXML | |
| 390 396 | 
             
                            end
         | 
| 391 397 | 
             
                          end
         | 
| 392 398 | 
             
                          return [ :attlistdecl, element, pairs, contents ]
         | 
| 393 | 
            -
                        elsif @source.match("NOTATION", true)
         | 
| 399 | 
            +
                        elsif @source.match?("NOTATION", true)
         | 
| 394 400 | 
             
                          base_error_message = "Malformed notation declaration"
         | 
| 395 | 
            -
                          unless @source.match(/\s+/um, true)
         | 
| 396 | 
            -
                            if @source.match(">")
         | 
| 401 | 
            +
                          unless @source.match?(/\s+/um, true)
         | 
| 402 | 
            +
                            if @source.match?(">")
         | 
| 397 403 | 
             
                              message = "#{base_error_message}: name is missing"
         | 
| 398 404 | 
             
                            else
         | 
| 399 405 | 
             
                              message = "#{base_error_message}: invalid name"
         | 
| @@ -405,7 +411,8 @@ module REXML | |
| 405 411 | 
             
                          id = parse_id(base_error_message,
         | 
| 406 412 | 
             
                                        accept_external_id: true,
         | 
| 407 413 | 
             
                                        accept_public_id: true)
         | 
| 408 | 
            -
                           | 
| 414 | 
            +
                          @source.match?(/\s*/um, true) # skip spaces
         | 
| 415 | 
            +
                          unless @source.match?(">", true)
         | 
| 409 416 | 
             
                            message = "#{base_error_message}: garbage before end >"
         | 
| 410 417 | 
             
                            raise REXML::ParseException.new(message, @source)
         | 
| 411 418 | 
             
                          end
         | 
| @@ -419,7 +426,7 @@ module REXML | |
| 419 426 | 
             
                        end
         | 
| 420 427 | 
             
                      elsif match = @source.match(/(%.*?;)\s*/um, true)
         | 
| 421 428 | 
             
                        return [ :externalentity, match[1] ]
         | 
| 422 | 
            -
                      elsif @source.match(/\]\s*>/um, true)
         | 
| 429 | 
            +
                      elsif @source.match?(/\]\s*>/um, true)
         | 
| 423 430 | 
             
                        @document_status = :after_doctype
         | 
| 424 431 | 
             
                        return [ :end_doctype ]
         | 
| 425 432 | 
             
                      end
         | 
| @@ -428,16 +435,16 @@ module REXML | |
| 428 435 | 
             
                      end
         | 
| 429 436 | 
             
                    end
         | 
| 430 437 | 
             
                    if @document_status == :after_doctype
         | 
| 431 | 
            -
                      @source.match(/\s*/um, true)
         | 
| 438 | 
            +
                      @source.match?(/\s*/um, true)
         | 
| 432 439 | 
             
                    end
         | 
| 433 440 | 
             
                    begin
         | 
| 434 441 | 
             
                      start_position = @source.position
         | 
| 435 | 
            -
                      if @source.match("<", true)
         | 
| 442 | 
            +
                      if @source.match?("<", true)
         | 
| 436 443 | 
             
                        # :text's read_until may remain only "<" in buffer. In the
         | 
| 437 444 | 
             
                        # case, buffer is empty here. So we need to fill buffer
         | 
| 438 445 | 
             
                        # here explicitly.
         | 
| 439 446 | 
             
                        @source.ensure_buffer
         | 
| 440 | 
            -
                        if @source.match("/", true)
         | 
| 447 | 
            +
                        if @source.match?("/", true)
         | 
| 441 448 | 
             
                          @namespaces_restore_stack.pop
         | 
| 442 449 | 
             
                          last_tag = @tags.pop
         | 
| 443 450 | 
             
                          md = @source.match(Private::CLOSE_PATTERN, true)
         | 
| @@ -452,7 +459,7 @@ module REXML | |
| 452 459 | 
             
                            raise REXML::ParseException.new(message, @source)
         | 
| 453 460 | 
             
                          end
         | 
| 454 461 | 
             
                          return [ :end_element, last_tag ]
         | 
| 455 | 
            -
                        elsif @source.match("!", true)
         | 
| 462 | 
            +
                        elsif @source.match?("!", true)
         | 
| 456 463 | 
             
                          md = @source.match(/([^>]*>)/um)
         | 
| 457 464 | 
             
                          #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
         | 
| 458 465 | 
             
                          raise REXML::ParseException.new("Malformed node", @source) unless md
         | 
| @@ -470,7 +477,7 @@ module REXML | |
| 470 477 | 
             
                          end
         | 
| 471 478 | 
             
                          raise REXML::ParseException.new( "Declarations can only occur "+
         | 
| 472 479 | 
             
                            "in the doctype declaration.", @source)
         | 
| 473 | 
            -
                        elsif @source.match("?", true)
         | 
| 480 | 
            +
                        elsif @source.match?("?", true)
         | 
| 474 481 | 
             
                          return process_instruction
         | 
| 475 482 | 
             
                        else
         | 
| 476 483 | 
             
                          # Get the next tag
         | 
| @@ -651,7 +658,7 @@ module REXML | |
| 651 658 | 
             
                  def parse_name(base_error_message)
         | 
| 652 659 | 
             
                    md = @source.match(Private::NAME_PATTERN, true)
         | 
| 653 660 | 
             
                    unless md
         | 
| 654 | 
            -
                      if @source.match(/\S/um)
         | 
| 661 | 
            +
                      if @source.match?(/\S/um)
         | 
| 655 662 | 
             
                        message = "#{base_error_message}: invalid name"
         | 
| 656 663 | 
             
                      else
         | 
| 657 664 | 
             
                        message = "#{base_error_message}: name is missing"
         | 
| @@ -693,34 +700,34 @@ module REXML | |
| 693 700 | 
             
                                               accept_public_id:)
         | 
| 694 701 | 
             
                    public = /\A\s*PUBLIC/um
         | 
| 695 702 | 
             
                    system = /\A\s*SYSTEM/um
         | 
| 696 | 
            -
                    if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
         | 
| 697 | 
            -
                      if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 703 | 
            +
                    if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
         | 
| 704 | 
            +
                      if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 698 705 | 
             
                        return "public ID literal is missing"
         | 
| 699 706 | 
             
                      end
         | 
| 700 | 
            -
                      unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
         | 
| 707 | 
            +
                      unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
         | 
| 701 708 | 
             
                        return "invalid public ID literal"
         | 
| 702 709 | 
             
                      end
         | 
| 703 710 | 
             
                      if accept_public_id
         | 
| 704 | 
            -
                        if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
         | 
| 711 | 
            +
                        if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
         | 
| 705 712 | 
             
                          return "system ID literal is missing"
         | 
| 706 713 | 
             
                        end
         | 
| 707 | 
            -
                        unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
         | 
| 714 | 
            +
                        unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
         | 
| 708 715 | 
             
                          return "invalid system literal"
         | 
| 709 716 | 
             
                        end
         | 
| 710 717 | 
             
                        "garbage after system literal"
         | 
| 711 718 | 
             
                      else
         | 
| 712 719 | 
             
                        "garbage after public ID literal"
         | 
| 713 720 | 
             
                      end
         | 
| 714 | 
            -
                    elsif accept_external_id and @source.match(/#{system}/um)
         | 
| 715 | 
            -
                      if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 721 | 
            +
                    elsif accept_external_id and @source.match?(/#{system}/um)
         | 
| 722 | 
            +
                      if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
         | 
| 716 723 | 
             
                        return "system literal is missing"
         | 
| 717 724 | 
             
                      end
         | 
| 718 | 
            -
                      unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
         | 
| 725 | 
            +
                      unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
         | 
| 719 726 | 
             
                        return "invalid system literal"
         | 
| 720 727 | 
             
                      end
         | 
| 721 728 | 
             
                      "garbage after system literal"
         | 
| 722 729 | 
             
                    else
         | 
| 723 | 
            -
                      unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
         | 
| 730 | 
            +
                      unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
         | 
| 724 731 | 
             
                        return "invalid ID type"
         | 
| 725 732 | 
             
                      end
         | 
| 726 733 | 
             
                      "ID type is missing"
         | 
| @@ -729,7 +736,7 @@ module REXML | |
| 729 736 |  | 
| 730 737 | 
             
                  def process_instruction
         | 
| 731 738 | 
             
                    name = parse_name("Malformed XML: Invalid processing instruction node")
         | 
| 732 | 
            -
                    if @source.match(/\s+/um, true)
         | 
| 739 | 
            +
                    if @source.match?(/\s+/um, true)
         | 
| 733 740 | 
             
                      match_data = @source.match(/(.*?)\?>/um, true)
         | 
| 734 741 | 
             
                      unless match_data
         | 
| 735 742 | 
             
                        raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
         | 
| @@ -737,7 +744,7 @@ module REXML | |
| 737 744 | 
             
                      content = match_data[1]
         | 
| 738 745 | 
             
                    else
         | 
| 739 746 | 
             
                      content = nil
         | 
| 740 | 
            -
                      unless @source.match("?>", true)
         | 
| 747 | 
            +
                      unless @source.match?("?>", true)
         | 
| 741 748 | 
             
                        raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
         | 
| 742 749 | 
             
                      end
         | 
| 743 750 | 
             
                    end
         | 
| @@ -762,14 +769,33 @@ module REXML | |
| 762 769 | 
             
                    [:processing_instruction, name, content]
         | 
| 763 770 | 
             
                  end
         | 
| 764 771 |  | 
| 772 | 
            +
                  if StringScanner::Version < "3.1.1"
         | 
| 773 | 
            +
                    def scan_quote
         | 
| 774 | 
            +
                      @source.match(/(['"])/, true)&.[](1)
         | 
| 775 | 
            +
                    end
         | 
| 776 | 
            +
                  else
         | 
| 777 | 
            +
                    def scan_quote
         | 
| 778 | 
            +
                      case @source.peek_byte
         | 
| 779 | 
            +
                      when 34 # '"'.ord
         | 
| 780 | 
            +
                        @source.scan_byte
         | 
| 781 | 
            +
                        '"'
         | 
| 782 | 
            +
                      when 39 # "'".ord
         | 
| 783 | 
            +
                        @source.scan_byte
         | 
| 784 | 
            +
                        "'"
         | 
| 785 | 
            +
                      else
         | 
| 786 | 
            +
                        nil
         | 
| 787 | 
            +
                      end
         | 
| 788 | 
            +
                    end
         | 
| 789 | 
            +
                  end
         | 
| 790 | 
            +
             | 
| 765 791 | 
             
                  def parse_attributes(prefixes)
         | 
| 766 792 | 
             
                    attributes = {}
         | 
| 767 793 | 
             
                    expanded_names = {}
         | 
| 768 794 | 
             
                    closed = false
         | 
| 769 795 | 
             
                    while true
         | 
| 770 | 
            -
                      if @source.match(">", true)
         | 
| 796 | 
            +
                      if @source.match?(">", true)
         | 
| 771 797 | 
             
                        return attributes, closed
         | 
| 772 | 
            -
                      elsif @source.match("/>", true)
         | 
| 798 | 
            +
                      elsif @source.match?("/>", true)
         | 
| 773 799 | 
             
                        closed = true
         | 
| 774 800 | 
             
                        return attributes, closed
         | 
| 775 801 | 
             
                      elsif match = @source.match(QNAME, true)
         | 
| @@ -777,15 +803,14 @@ module REXML | |
| 777 803 | 
             
                        prefix = match[2]
         | 
| 778 804 | 
             
                        local_part = match[3]
         | 
| 779 805 |  | 
| 780 | 
            -
                        unless @source.match(/\s*=\s*/um, true)
         | 
| 806 | 
            +
                        unless @source.match?(/\s*=\s*/um, true)
         | 
| 781 807 | 
             
                          message = "Missing attribute equal: <#{name}>"
         | 
| 782 808 | 
             
                          raise REXML::ParseException.new(message, @source)
         | 
| 783 809 | 
             
                        end
         | 
| 784 | 
            -
                        unless  | 
| 810 | 
            +
                        unless quote = scan_quote
         | 
| 785 811 | 
             
                          message = "Missing attribute value start quote: <#{name}>"
         | 
| 786 812 | 
             
                          raise REXML::ParseException.new(message, @source)
         | 
| 787 813 | 
             
                        end
         | 
| 788 | 
            -
                        quote = match[1]
         | 
| 789 814 | 
             
                        start_position = @source.position
         | 
| 790 815 | 
             
                        value = @source.read_until(quote)
         | 
| 791 816 | 
             
                        unless value.chomp!(quote)
         | 
| @@ -793,7 +818,7 @@ module REXML | |
| 793 818 | 
             
                          message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
         | 
| 794 819 | 
             
                          raise REXML::ParseException.new(message, @source)
         | 
| 795 820 | 
             
                        end
         | 
| 796 | 
            -
                        @source.match(/\s*/um, true)
         | 
| 821 | 
            +
                        @source.match?(/\s*/um, true)
         | 
| 797 822 | 
             
                        if prefix == "xmlns"
         | 
| 798 823 | 
             
                          if local_part == "xml"
         | 
| 799 824 | 
             
                            if value != Private::XML_PREFIXED_NAMESPACE
         | 
    
        data/lib/rexml/rexml.rb
    CHANGED
    
    
    
        data/lib/rexml/source.rb
    CHANGED
    
    | @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            # coding: US-ASCII
         | 
| 2 2 | 
             
            # frozen_string_literal: false
         | 
| 3 3 |  | 
| 4 | 
            +
            require "stringio"
         | 
| 4 5 | 
             
            require "strscan"
         | 
| 5 6 |  | 
| 6 7 | 
             
            require_relative 'encoding'
         | 
| @@ -18,6 +19,16 @@ module REXML | |
| 18 19 | 
             
                      pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
         | 
| 19 20 | 
             
                      super(pattern)
         | 
| 20 21 | 
             
                    end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                    def match?(pattern)
         | 
| 24 | 
            +
                      pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
         | 
| 25 | 
            +
                      super(pattern)
         | 
| 26 | 
            +
                    end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    def skip(pattern)
         | 
| 29 | 
            +
                      pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
         | 
| 30 | 
            +
                      super(pattern)
         | 
| 31 | 
            +
                    end
         | 
| 21 32 | 
             
                  end
         | 
| 22 33 | 
             
                end
         | 
| 23 34 | 
             
                using StringScannerCheckScanString
         | 
| @@ -35,7 +46,6 @@ module REXML | |
| 35 46 | 
             
                      arg.respond_to? :eof?
         | 
| 36 47 | 
             
                    IOSource.new(arg)
         | 
| 37 48 | 
             
                  elsif arg.respond_to? :to_str
         | 
| 38 | 
            -
                    require 'stringio'
         | 
| 39 49 | 
             
                    IOSource.new(StringIO.new(arg))
         | 
| 40 50 | 
             
                  elsif arg.kind_of? Source
         | 
| 41 51 | 
             
                    arg
         | 
| @@ -58,8 +68,14 @@ module REXML | |
| 58 68 | 
             
                  SCANNER_RESET_SIZE = 100000
         | 
| 59 69 | 
             
                  PRE_DEFINED_TERM_PATTERNS = {}
         | 
| 60 70 | 
             
                  pre_defined_terms = ["'", '"', "<"]
         | 
| 61 | 
            -
                   | 
| 62 | 
            -
                     | 
| 71 | 
            +
                  if StringScanner::Version < "3.1.1"
         | 
| 72 | 
            +
                    pre_defined_terms.each do |term|
         | 
| 73 | 
            +
                      PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
         | 
| 74 | 
            +
                    end
         | 
| 75 | 
            +
                  else
         | 
| 76 | 
            +
                    pre_defined_terms.each do |term|
         | 
| 77 | 
            +
                      PRE_DEFINED_TERM_PATTERNS[term] = term
         | 
| 78 | 
            +
                    end
         | 
| 63 79 | 
             
                  end
         | 
| 64 80 | 
             
                end
         | 
| 65 81 | 
             
                private_constant :Private
         | 
| @@ -77,7 +93,7 @@ module REXML | |
| 77 93 | 
             
                    detect_encoding
         | 
| 78 94 | 
             
                  end
         | 
| 79 95 | 
             
                  @line = 0
         | 
| 80 | 
            -
                  @ | 
| 96 | 
            +
                  @encoded_terms = {}
         | 
| 81 97 | 
             
                end
         | 
| 82 98 |  | 
| 83 99 | 
             
                # The current buffer (what we're going to read next)
         | 
| @@ -126,6 +142,14 @@ module REXML | |
| 126 142 | 
             
                  end
         | 
| 127 143 | 
             
                end
         | 
| 128 144 |  | 
| 145 | 
            +
                def match?(pattern, cons=false)
         | 
| 146 | 
            +
                  if cons
         | 
| 147 | 
            +
                    !@scanner.skip(pattern).nil?
         | 
| 148 | 
            +
                  else
         | 
| 149 | 
            +
                    !@scanner.match?(pattern).nil?
         | 
| 150 | 
            +
                  end
         | 
| 151 | 
            +
                end
         | 
| 152 | 
            +
             | 
| 129 153 | 
             
                def position
         | 
| 130 154 | 
             
                  @scanner.pos
         | 
| 131 155 | 
             
                end
         | 
| @@ -134,6 +158,14 @@ module REXML | |
| 134 158 | 
             
                  @scanner.pos = pos
         | 
| 135 159 | 
             
                end
         | 
| 136 160 |  | 
| 161 | 
            +
                def peek_byte
         | 
| 162 | 
            +
                  @scanner.peek_byte
         | 
| 163 | 
            +
                end
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                def scan_byte
         | 
| 166 | 
            +
                  @scanner.scan_byte
         | 
| 167 | 
            +
                end
         | 
| 168 | 
            +
             | 
| 137 169 | 
             
                # @return true if the Source is exhausted
         | 
| 138 170 | 
             
                def empty?
         | 
| 139 171 | 
             
                  @scanner.eos?
         | 
| @@ -228,7 +260,7 @@ module REXML | |
| 228 260 |  | 
| 229 261 | 
             
                def read_until(term)
         | 
| 230 262 | 
             
                  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
         | 
| 231 | 
            -
                  term = @ | 
| 263 | 
            +
                  term = @encoded_terms[term] ||= encode(term)
         | 
| 232 264 | 
             
                  until str = @scanner.scan_until(pattern)
         | 
| 233 265 | 
             
                    break if @source.nil?
         | 
| 234 266 | 
             
                    break if @source.eof?
         | 
| @@ -267,6 +299,23 @@ module REXML | |
| 267 299 | 
             
                  md.nil? ? nil : @scanner
         | 
| 268 300 | 
             
                end
         | 
| 269 301 |  | 
| 302 | 
            +
                def match?( pattern, cons=false )
         | 
| 303 | 
            +
                  # To avoid performance issue, we need to increase bytes to read per scan
         | 
| 304 | 
            +
                  min_bytes = 1
         | 
| 305 | 
            +
                  while true
         | 
| 306 | 
            +
                    if cons
         | 
| 307 | 
            +
                      n_matched_bytes = @scanner.skip(pattern)
         | 
| 308 | 
            +
                    else
         | 
| 309 | 
            +
                      n_matched_bytes = @scanner.match?(pattern)
         | 
| 310 | 
            +
                    end
         | 
| 311 | 
            +
                    return true if n_matched_bytes
         | 
| 312 | 
            +
                    return false if pattern.is_a?(String)
         | 
| 313 | 
            +
                    return false if @source.nil?
         | 
| 314 | 
            +
                    return false unless read(nil, min_bytes)
         | 
| 315 | 
            +
                    min_bytes *= 2
         | 
| 316 | 
            +
                  end
         | 
| 317 | 
            +
                end
         | 
| 318 | 
            +
             | 
| 270 319 | 
             
                def empty?
         | 
| 271 320 | 
             
                  super and ( @source.nil? || @source.eof? )
         | 
| 272 321 | 
             
                end
         | 
| @@ -286,7 +335,7 @@ module REXML | |
| 286 335 | 
             
                    rescue
         | 
| 287 336 | 
             
                    end
         | 
| 288 337 | 
             
                    @er_source.seek(pos)
         | 
| 289 | 
            -
                  rescue IOError
         | 
| 338 | 
            +
                  rescue IOError, SystemCallError
         | 
| 290 339 | 
             
                    pos = -1
         | 
| 291 340 | 
             
                    line = -1
         | 
| 292 341 | 
             
                  end
         | 
    
        data/lib/rexml/text.rb
    CHANGED
    
    | @@ -29,31 +29,16 @@ module REXML | |
| 29 29 | 
             
                  (0x10000..0x10FFFF)
         | 
| 30 30 | 
             
                ]
         | 
| 31 31 |  | 
| 32 | 
            -
                 | 
| 33 | 
            -
                   | 
| 34 | 
            -
                     | 
| 35 | 
            -
             | 
| 36 | 
            -
                       | 
| 37 | 
            -
             | 
| 38 | 
            -
                       | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
                  ']*$')
         | 
| 43 | 
            -
                else
         | 
| 44 | 
            -
                  VALID_XML_CHARS = /^(
         | 
| 45 | 
            -
                       [\x09\x0A\x0D\x20-\x7E]            # ASCII
         | 
| 46 | 
            -
                     | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
         | 
| 47 | 
            -
                     |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
         | 
| 48 | 
            -
                     | [\xE1-\xEC\xEE][\x80-\xBF]{2}      # straight 3-byte
         | 
| 49 | 
            -
                     |  \xEF[\x80-\xBE]{2}                #
         | 
| 50 | 
            -
                     |  \xEF\xBF[\x80-\xBD]               # excluding U+fffe and U+ffff
         | 
| 51 | 
            -
                     |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
         | 
| 52 | 
            -
                     |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
         | 
| 53 | 
            -
                     | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
         | 
| 54 | 
            -
                     |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
         | 
| 55 | 
            -
                   )*$/nx;
         | 
| 56 | 
            -
                end
         | 
| 32 | 
            +
                VALID_XML_CHARS = Regexp.new('^['+
         | 
| 33 | 
            +
                  VALID_CHAR.map { |item|
         | 
| 34 | 
            +
                    case item
         | 
| 35 | 
            +
                    when Integer
         | 
| 36 | 
            +
                      [item].pack('U').force_encoding('utf-8')
         | 
| 37 | 
            +
                    when Range
         | 
| 38 | 
            +
                      [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
         | 
| 39 | 
            +
                    end
         | 
| 40 | 
            +
                  }.join +
         | 
| 41 | 
            +
                ']*$')
         | 
| 57 42 |  | 
| 58 43 | 
             
                # Constructor
         | 
| 59 44 | 
             
                # +arg+ if a String, the content is set to the String.  If a Text,
         | 
| @@ -132,21 +117,11 @@ module REXML | |
| 132 117 |  | 
| 133 118 | 
             
                  # illegal anywhere
         | 
| 134 119 | 
             
                  if !string.match?(VALID_XML_CHARS)
         | 
| 135 | 
            -
                     | 
| 136 | 
            -
                       | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
                         | 
| 140 | 
            -
                          raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
         | 
| 141 | 
            -
                        end
         | 
| 142 | 
            -
                      end
         | 
| 143 | 
            -
                    else
         | 
| 144 | 
            -
                      string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
         | 
| 145 | 
            -
                        case c.unpack('U')
         | 
| 146 | 
            -
                        when *VALID_CHAR
         | 
| 147 | 
            -
                        else
         | 
| 148 | 
            -
                          raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
         | 
| 149 | 
            -
                        end
         | 
| 120 | 
            +
                    string.chars.each do |c|
         | 
| 121 | 
            +
                      case c.ord
         | 
| 122 | 
            +
                      when *VALID_CHAR
         | 
| 123 | 
            +
                      else
         | 
| 124 | 
            +
                        raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
         | 
| 150 125 | 
             
                      end
         | 
| 151 126 | 
             
                    end
         | 
| 152 127 | 
             
                  end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: rexml
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 3. | 
| 4 | 
            +
              version: 3.4.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Kouhei Sutou
         | 
| 8 8 | 
             
            bindir: bin
         | 
| 9 9 | 
             
            cert_chain: []
         | 
| 10 | 
            -
            date:  | 
| 10 | 
            +
            date: 2025-02-16 00:00:00.000000000 Z
         | 
| 11 11 | 
             
            dependencies: []
         | 
| 12 12 | 
             
            description: An XML toolkit for Ruby
         | 
| 13 13 | 
             
            email:
         | 
| @@ -102,7 +102,7 @@ homepage: https://github.com/ruby/rexml | |
| 102 102 | 
             
            licenses:
         | 
| 103 103 | 
             
            - BSD-2-Clause
         | 
| 104 104 | 
             
            metadata:
         | 
| 105 | 
            -
              changelog_uri: https://github.com/ruby/rexml/releases/tag/v3. | 
| 105 | 
            +
              changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
         | 
| 106 106 | 
             
            rdoc_options:
         | 
| 107 107 | 
             
            - "--main"
         | 
| 108 108 | 
             
            - README.md
         | 
| @@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 119 119 | 
             
                - !ruby/object:Gem::Version
         | 
| 120 120 | 
             
                  version: '0'
         | 
| 121 121 | 
             
            requirements: []
         | 
| 122 | 
            -
            rubygems_version: 3.6. | 
| 122 | 
            +
            rubygems_version: 3.6.2
         | 
| 123 123 | 
             
            specification_version: 4
         | 
| 124 124 | 
             
            summary: An XML toolkit for Ruby
         | 
| 125 125 | 
             
            test_files: []
         |