rexml 3.2.6 → 3.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +306 -0
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +5 -47
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +421 -263
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +6 -19
- data/lib/rexml/parsers/streamparser.rb +8 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +128 -98
- data/lib/rexml/text.rb +34 -14
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +7 -37
| @@ -22,6 +22,10 @@ module REXML | |
| 22 22 | 
             
                    @parser.source
         | 
| 23 23 | 
             
                  end
         | 
| 24 24 |  | 
| 25 | 
            +
                  def entity_expansion_count
         | 
| 26 | 
            +
                    @parser.entity_expansion_count
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
             | 
| 25 29 | 
             
                  def add_listener( listener )
         | 
| 26 30 | 
             
                    @parser.add_listener( listener )
         | 
| 27 31 | 
             
                  end
         | 
| @@ -157,25 +161,8 @@ module REXML | |
| 157 161 | 
             
                          end
         | 
| 158 162 | 
             
                        end
         | 
| 159 163 | 
             
                      when :text
         | 
| 160 | 
            -
                         | 
| 161 | 
            -
                         | 
| 162 | 
            -
                        copy = event[1].clone
         | 
| 163 | 
            -
             | 
| 164 | 
            -
                        esub = proc { |match|
         | 
| 165 | 
            -
                          if @entities.has_key?($1)
         | 
| 166 | 
            -
                            @entities[$1].gsub(Text::REFERENCE, &esub)
         | 
| 167 | 
            -
                          else
         | 
| 168 | 
            -
                            match
         | 
| 169 | 
            -
                          end
         | 
| 170 | 
            -
                        }
         | 
| 171 | 
            -
             | 
| 172 | 
            -
                        copy.gsub!( Text::REFERENCE, &esub )
         | 
| 173 | 
            -
                        copy.gsub!( Text::NUMERICENTITY ) {|m|
         | 
| 174 | 
            -
                          m=$1
         | 
| 175 | 
            -
                          m = "0#{m}" if m[0] == ?x
         | 
| 176 | 
            -
                          [Integer(m)].pack('U*')
         | 
| 177 | 
            -
                        }
         | 
| 178 | 
            -
                        handle( :characters, copy )
         | 
| 164 | 
            +
                        unnormalized = @parser.unnormalize( event[1], @entities )
         | 
| 165 | 
            +
                        handle( :characters, unnormalized )
         | 
| 179 166 | 
             
                      when :entitydecl
         | 
| 180 167 | 
             
                        handle_entitydecl( event )
         | 
| 181 168 | 
             
                      when :processing_instruction, :comment, :attlistdecl,
         | 
| @@ -7,37 +7,34 @@ module REXML | |
| 7 7 | 
             
                  def initialize source, listener
         | 
| 8 8 | 
             
                    @listener = listener
         | 
| 9 9 | 
             
                    @parser = BaseParser.new( source )
         | 
| 10 | 
            -
                    @ | 
| 10 | 
            +
                    @entities = {}
         | 
| 11 11 | 
             
                  end
         | 
| 12 12 |  | 
| 13 13 | 
             
                  def add_listener( listener )
         | 
| 14 14 | 
             
                    @parser.add_listener( listener )
         | 
| 15 15 | 
             
                  end
         | 
| 16 16 |  | 
| 17 | 
            +
                  def entity_expansion_count
         | 
| 18 | 
            +
                    @parser.entity_expansion_count
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
             | 
| 17 21 | 
             
                  def parse
         | 
| 18 22 | 
             
                    # entity string
         | 
| 19 23 | 
             
                    while true
         | 
| 20 24 | 
             
                      event = @parser.pull
         | 
| 21 25 | 
             
                      case event[0]
         | 
| 22 26 | 
             
                      when :end_document
         | 
| 23 | 
            -
                        unless @tag_stack.empty?
         | 
| 24 | 
            -
                          tag_path = "/" + @tag_stack.join("/")
         | 
| 25 | 
            -
                          raise ParseException.new("Missing end tag for '#{tag_path}'",
         | 
| 26 | 
            -
                                                   @parser.source)
         | 
| 27 | 
            -
                        end
         | 
| 28 27 | 
             
                        return
         | 
| 29 28 | 
             
                      when :start_element
         | 
| 30 | 
            -
                        @tag_stack << event[1]
         | 
| 31 29 | 
             
                        attrs = event[2].each do |n, v|
         | 
| 32 30 | 
             
                          event[2][n] = @parser.unnormalize( v )
         | 
| 33 31 | 
             
                        end
         | 
| 34 32 | 
             
                        @listener.tag_start( event[1], attrs )
         | 
| 35 33 | 
             
                      when :end_element
         | 
| 36 34 | 
             
                        @listener.tag_end( event[1] )
         | 
| 37 | 
            -
                        @tag_stack.pop
         | 
| 38 35 | 
             
                      when :text
         | 
| 39 | 
            -
                         | 
| 40 | 
            -
                        @listener.text(  | 
| 36 | 
            +
                        unnormalized = @parser.unnormalize( event[1], @entities )
         | 
| 37 | 
            +
                        @listener.text( unnormalized )
         | 
| 41 38 | 
             
                      when :processing_instruction
         | 
| 42 39 | 
             
                        @listener.instruction( *event[1,2] )
         | 
| 43 40 | 
             
                      when :start_doctype
         | 
| @@ -48,6 +45,7 @@ module REXML | |
| 48 45 | 
             
                      when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
         | 
| 49 46 | 
             
                        @listener.send( event[0].to_s, *event[1..-1] )
         | 
| 50 47 | 
             
                      when :entitydecl, :notationdecl
         | 
| 48 | 
            +
                        @entities[ event[1] ] = event[2] if event.size == 3
         | 
| 51 49 | 
             
                        @listener.send( event[0].to_s, event[1..-1] )
         | 
| 52 50 | 
             
                      when :externalentity
         | 
| 53 51 | 
             
                        entity_reference = event[1]
         | 
| @@ -15,8 +15,6 @@ module REXML | |
| 15 15 | 
             
                  end
         | 
| 16 16 |  | 
| 17 17 | 
             
                  def parse
         | 
| 18 | 
            -
                    tag_stack = []
         | 
| 19 | 
            -
                    in_doctype = false
         | 
| 20 18 | 
             
                    entities = nil
         | 
| 21 19 | 
             
                    begin
         | 
| 22 20 | 
             
                      while true
         | 
| @@ -24,32 +22,24 @@ module REXML | |
| 24 22 | 
             
                        #STDERR.puts "TREEPARSER GOT #{event.inspect}"
         | 
| 25 23 | 
             
                        case event[0]
         | 
| 26 24 | 
             
                        when :end_document
         | 
| 27 | 
            -
                          unless tag_stack.empty?
         | 
| 28 | 
            -
                            raise ParseException.new("No close tag for #{@build_context.xpath}",
         | 
| 29 | 
            -
                                                     @parser.source, @parser)
         | 
| 30 | 
            -
                          end
         | 
| 31 25 | 
             
                          return
         | 
| 32 26 | 
             
                        when :start_element
         | 
| 33 | 
            -
                          tag_stack.push(event[1])
         | 
| 34 27 | 
             
                          el = @build_context = @build_context.add_element( event[1] )
         | 
| 35 28 | 
             
                          event[2].each do |key, value|
         | 
| 36 29 | 
             
                            el.attributes[key]=Attribute.new(key,value,self)
         | 
| 37 30 | 
             
                          end
         | 
| 38 31 | 
             
                        when :end_element
         | 
| 39 | 
            -
                          tag_stack.pop
         | 
| 40 32 | 
             
                          @build_context = @build_context.parent
         | 
| 41 33 | 
             
                        when :text
         | 
| 42 | 
            -
                          if  | 
| 43 | 
            -
                             | 
| 44 | 
            -
             | 
| 45 | 
            -
                             | 
| 46 | 
            -
                              @build_context. | 
| 47 | 
            -
             | 
| 48 | 
            -
                               | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
                              )
         | 
| 52 | 
            -
                            end
         | 
| 34 | 
            +
                          if @build_context[-1].instance_of? Text
         | 
| 35 | 
            +
                            @build_context[-1] << event[1]
         | 
| 36 | 
            +
                          else
         | 
| 37 | 
            +
                            @build_context.add(
         | 
| 38 | 
            +
                              Text.new(event[1], @build_context.whitespace, nil, true)
         | 
| 39 | 
            +
                            ) unless (
         | 
| 40 | 
            +
                              @build_context.ignore_whitespace_nodes and
         | 
| 41 | 
            +
                              event[1].strip.size==0
         | 
| 42 | 
            +
                            )
         | 
| 53 43 | 
             
                          end
         | 
| 54 44 | 
             
                        when :comment
         | 
| 55 45 | 
             
                          c = Comment.new( event[1] )
         | 
| @@ -60,14 +50,12 @@ module REXML | |
| 60 50 | 
             
                        when :processing_instruction
         | 
| 61 51 | 
             
                          @build_context.add( Instruction.new( event[1], event[2] ) )
         | 
| 62 52 | 
             
                        when :end_doctype
         | 
| 63 | 
            -
                          in_doctype = false
         | 
| 64 53 | 
             
                          entities.each { |k,v| entities[k] = @build_context.entities[k].value }
         | 
| 65 54 | 
             
                          @build_context = @build_context.parent
         | 
| 66 55 | 
             
                        when :start_doctype
         | 
| 67 56 | 
             
                          doctype = DocType.new( event[1..-1], @build_context )
         | 
| 68 57 | 
             
                          @build_context = doctype
         | 
| 69 58 | 
             
                          entities = {}
         | 
| 70 | 
            -
                          in_doctype = true
         | 
| 71 59 | 
             
                        when :attlistdecl
         | 
| 72 60 | 
             
                          n = AttlistDecl.new( event[1..-1] )
         | 
| 73 61 | 
             
                          @build_context.add( n )
         | 
    
        data/lib/rexml/rexml.rb
    CHANGED
    
    
    
        data/lib/rexml/source.rb
    CHANGED
    
    | @@ -1,8 +1,28 @@ | |
| 1 1 | 
             
            # coding: US-ASCII
         | 
| 2 2 | 
             
            # frozen_string_literal: false
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require "strscan"
         | 
| 5 | 
            +
             | 
| 3 6 | 
             
            require_relative 'encoding'
         | 
| 4 7 |  | 
| 5 8 | 
             
            module REXML
         | 
| 9 | 
            +
              if StringScanner::Version < "1.0.0"
         | 
| 10 | 
            +
                module StringScannerCheckScanString
         | 
| 11 | 
            +
                  refine StringScanner do
         | 
| 12 | 
            +
                    def check(pattern)
         | 
| 13 | 
            +
                      pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
         | 
| 14 | 
            +
                      super(pattern)
         | 
| 15 | 
            +
                    end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                    def scan(pattern)
         | 
| 18 | 
            +
                      pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
         | 
| 19 | 
            +
                      super(pattern)
         | 
| 20 | 
            +
                    end
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
                using StringScannerCheckScanString
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 6 26 | 
             
              # Generates Source-s.  USE THIS CLASS.
         | 
| 7 27 | 
             
              class SourceFactory
         | 
| 8 28 | 
             
                # Generates a Source object
         | 
| @@ -30,18 +50,27 @@ module REXML | |
| 30 50 | 
             
              # objects and provides consumption of text
         | 
| 31 51 | 
             
              class Source
         | 
| 32 52 | 
             
                include Encoding
         | 
| 33 | 
            -
                # The current buffer (what we're going to read next)
         | 
| 34 | 
            -
                attr_reader :buffer
         | 
| 35 53 | 
             
                # The line number of the last consumed text
         | 
| 36 54 | 
             
                attr_reader :line
         | 
| 37 55 | 
             
                attr_reader :encoding
         | 
| 38 56 |  | 
| 57 | 
            +
                module Private
         | 
| 58 | 
            +
                  SCANNER_RESET_SIZE = 100000
         | 
| 59 | 
            +
                  PRE_DEFINED_TERM_PATTERNS = {}
         | 
| 60 | 
            +
                  pre_defined_terms = ["'", '"', "<"]
         | 
| 61 | 
            +
                  pre_defined_terms.each do |term|
         | 
| 62 | 
            +
                    PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
         | 
| 63 | 
            +
                  end
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
                private_constant :Private
         | 
| 66 | 
            +
             | 
| 39 67 | 
             
                # Constructor
         | 
| 40 68 | 
             
                # @param arg must be a String, and should be a valid XML document
         | 
| 41 69 | 
             
                # @param encoding if non-null, sets the encoding of the source to this
         | 
| 42 70 | 
             
                # value, overriding all encoding detection
         | 
| 43 71 | 
             
                def initialize(arg, encoding=nil)
         | 
| 44 | 
            -
                  @orig =  | 
| 72 | 
            +
                  @orig = arg
         | 
| 73 | 
            +
                  @scanner = StringScanner.new(@orig)
         | 
| 45 74 | 
             
                  if encoding
         | 
| 46 75 | 
             
                    self.encoding = encoding
         | 
| 47 76 | 
             
                  else
         | 
| @@ -50,6 +79,20 @@ module REXML | |
| 50 79 | 
             
                  @line = 0
         | 
| 51 80 | 
             
                end
         | 
| 52 81 |  | 
| 82 | 
            +
                # The current buffer (what we're going to read next)
         | 
| 83 | 
            +
                def buffer
         | 
| 84 | 
            +
                  @scanner.rest
         | 
| 85 | 
            +
                end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                def drop_parsed_content
         | 
| 88 | 
            +
                  if @scanner.pos > Private::SCANNER_RESET_SIZE
         | 
| 89 | 
            +
                    @scanner.string = @scanner.rest
         | 
| 90 | 
            +
                  end
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                def buffer_encoding=(encoding)
         | 
| 94 | 
            +
                  @scanner.string.force_encoding(encoding)
         | 
| 95 | 
            +
                end
         | 
| 53 96 |  | 
| 54 97 | 
             
                # Inherited from Encoding
         | 
| 55 98 | 
             
                # Overridden to support optimized en/decoding
         | 
| @@ -58,98 +101,78 @@ module REXML | |
| 58 101 | 
             
                  encoding_updated
         | 
| 59 102 | 
             
                end
         | 
| 60 103 |  | 
| 61 | 
            -
                 | 
| 62 | 
            -
                # usual scan() method.  For one thing, the pattern argument has some
         | 
| 63 | 
            -
                # requirements; for another, the source can be consumed.  You can easily
         | 
| 64 | 
            -
                # confuse this method.  Originally, the patterns were easier
         | 
| 65 | 
            -
                # to construct and this method more robust, because this method
         | 
| 66 | 
            -
                # generated search regexps on the fly; however, this was
         | 
| 67 | 
            -
                # computationally expensive and slowed down the entire REXML package
         | 
| 68 | 
            -
                # considerably, since this is by far the most commonly called method.
         | 
| 69 | 
            -
                # @param pattern must be a Regexp, and must be in the form of
         | 
| 70 | 
            -
                # /^\s*(#{your pattern, with no groups})(.*)/.  The first group
         | 
| 71 | 
            -
                # will be returned; the second group is used if the consume flag is
         | 
| 72 | 
            -
                # set.
         | 
| 73 | 
            -
                # @param consume if true, the pattern returned will be consumed, leaving
         | 
| 74 | 
            -
                # everything after it in the Source.
         | 
| 75 | 
            -
                # @return the pattern, if found, or nil if the Source is empty or the
         | 
| 76 | 
            -
                # pattern is not found.
         | 
| 77 | 
            -
                def scan(pattern, cons=false)
         | 
| 78 | 
            -
                  return nil if @buffer.nil?
         | 
| 79 | 
            -
                  rv = @buffer.scan(pattern)
         | 
| 80 | 
            -
                  @buffer = $' if cons and rv.size>0
         | 
| 81 | 
            -
                  rv
         | 
| 104 | 
            +
                def read(term = nil)
         | 
| 82 105 | 
             
                end
         | 
| 83 106 |  | 
| 84 | 
            -
                def  | 
| 107 | 
            +
                def read_until(term)
         | 
| 108 | 
            +
                  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
         | 
| 109 | 
            +
                  data = @scanner.scan_until(pattern)
         | 
| 110 | 
            +
                  unless data
         | 
| 111 | 
            +
                    data = @scanner.rest
         | 
| 112 | 
            +
                    @scanner.pos = @scanner.string.bytesize
         | 
| 113 | 
            +
                  end
         | 
| 114 | 
            +
                  data
         | 
| 85 115 | 
             
                end
         | 
| 86 116 |  | 
| 87 | 
            -
                def  | 
| 88 | 
            -
                  @buffer = $' if pattern.match( @buffer )
         | 
| 117 | 
            +
                def ensure_buffer
         | 
| 89 118 | 
             
                end
         | 
| 90 119 |  | 
| 91 | 
            -
                def  | 
| 92 | 
            -
                   | 
| 120 | 
            +
                def match(pattern, cons=false)
         | 
| 121 | 
            +
                  if cons
         | 
| 122 | 
            +
                    @scanner.scan(pattern).nil? ? nil : @scanner
         | 
| 123 | 
            +
                  else
         | 
| 124 | 
            +
                    @scanner.check(pattern).nil? ? nil : @scanner
         | 
| 125 | 
            +
                  end
         | 
| 93 126 | 
             
                end
         | 
| 94 127 |  | 
| 95 | 
            -
                def  | 
| 96 | 
            -
                   | 
| 97 | 
            -
                  @buffer = $'
         | 
| 98 | 
            -
                  return md
         | 
| 128 | 
            +
                def position
         | 
| 129 | 
            +
                  @scanner.pos
         | 
| 99 130 | 
             
                end
         | 
| 100 131 |  | 
| 101 | 
            -
                def  | 
| 102 | 
            -
                   | 
| 103 | 
            -
                  @buffer = $' if cons and md
         | 
| 104 | 
            -
                  return md
         | 
| 132 | 
            +
                def position=(pos)
         | 
| 133 | 
            +
                  @scanner.pos = pos
         | 
| 105 134 | 
             
                end
         | 
| 106 135 |  | 
| 107 136 | 
             
                # @return true if the Source is exhausted
         | 
| 108 137 | 
             
                def empty?
         | 
| 109 | 
            -
                  @ | 
| 110 | 
            -
                end
         | 
| 111 | 
            -
             | 
| 112 | 
            -
                def position
         | 
| 113 | 
            -
                  @orig.index( @buffer )
         | 
| 138 | 
            +
                  @scanner.eos?
         | 
| 114 139 | 
             
                end
         | 
| 115 140 |  | 
| 116 141 | 
             
                # @return the current line in the source
         | 
| 117 142 | 
             
                def current_line
         | 
| 118 143 | 
             
                  lines = @orig.split
         | 
| 119 | 
            -
                  res = lines.grep @ | 
| 144 | 
            +
                  res = lines.grep @scanner.rest[0..30]
         | 
| 120 145 | 
             
                  res = res[-1] if res.kind_of? Array
         | 
| 121 146 | 
             
                  lines.index( res ) if res
         | 
| 122 147 | 
             
                end
         | 
| 123 148 |  | 
| 124 149 | 
             
                private
         | 
| 150 | 
            +
             | 
| 125 151 | 
             
                def detect_encoding
         | 
| 126 | 
            -
                   | 
| 152 | 
            +
                  scanner_encoding = @scanner.rest.encoding
         | 
| 127 153 | 
             
                  detected_encoding = "UTF-8"
         | 
| 128 154 | 
             
                  begin
         | 
| 129 | 
            -
                    @ | 
| 130 | 
            -
                    if @ | 
| 131 | 
            -
                      @buffer[0, 2] = ""
         | 
| 155 | 
            +
                    @scanner.string.force_encoding("ASCII-8BIT")
         | 
| 156 | 
            +
                    if @scanner.scan(/\xfe\xff/n)
         | 
| 132 157 | 
             
                      detected_encoding = "UTF-16BE"
         | 
| 133 | 
            -
                    elsif @ | 
| 134 | 
            -
                      @buffer[0, 2] = ""
         | 
| 158 | 
            +
                    elsif @scanner.scan(/\xff\xfe/n)
         | 
| 135 159 | 
             
                      detected_encoding = "UTF-16LE"
         | 
| 136 | 
            -
                    elsif @ | 
| 137 | 
            -
                      @buffer[0, 3] = ""
         | 
| 160 | 
            +
                    elsif @scanner.scan(/\xef\xbb\xbf/n)
         | 
| 138 161 | 
             
                      detected_encoding = "UTF-8"
         | 
| 139 162 | 
             
                    end
         | 
| 140 163 | 
             
                  ensure
         | 
| 141 | 
            -
                    @ | 
| 164 | 
            +
                    @scanner.string.force_encoding(scanner_encoding)
         | 
| 142 165 | 
             
                  end
         | 
| 143 166 | 
             
                  self.encoding = detected_encoding
         | 
| 144 167 | 
             
                end
         | 
| 145 168 |  | 
| 146 169 | 
             
                def encoding_updated
         | 
| 147 170 | 
             
                  if @encoding != 'UTF-8'
         | 
| 148 | 
            -
                    @ | 
| 171 | 
            +
                    @scanner.string = decode(@scanner.rest)
         | 
| 149 172 | 
             
                    @to_utf = true
         | 
| 150 173 | 
             
                  else
         | 
| 151 174 | 
             
                    @to_utf = false
         | 
| 152 | 
            -
                    @ | 
| 175 | 
            +
                    @scanner.string.force_encoding(::Encoding::UTF_8)
         | 
| 153 176 | 
             
                  end
         | 
| 154 177 | 
             
                end
         | 
| 155 178 | 
             
              end
         | 
| @@ -172,7 +195,7 @@ module REXML | |
| 172 195 | 
             
                  end
         | 
| 173 196 |  | 
| 174 197 | 
             
                  if !@to_utf and
         | 
| 175 | 
            -
                      @ | 
| 198 | 
            +
                      @orig.respond_to?(:force_encoding) and
         | 
| 176 199 | 
             
                      @source.respond_to?(:external_encoding) and
         | 
| 177 200 | 
             
                      @source.external_encoding != ::Encoding::UTF_8
         | 
| 178 201 | 
             
                    @force_utf8 = true
         | 
| @@ -181,65 +204,72 @@ module REXML | |
| 181 204 | 
             
                  end
         | 
| 182 205 | 
             
                end
         | 
| 183 206 |  | 
| 184 | 
            -
                def  | 
| 185 | 
            -
                   | 
| 186 | 
            -
                   | 
| 187 | 
            -
             | 
| 188 | 
            -
             | 
| 189 | 
            -
             | 
| 190 | 
            -
             | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 193 | 
            -
             | 
| 194 | 
            -
                         | 
| 195 | 
            -
                      rescue Iconv::IllegalSequence
         | 
| 196 | 
            -
                        raise
         | 
| 197 | 
            -
                      rescue
         | 
| 198 | 
            -
                        @source = nil
         | 
| 207 | 
            +
                def read(term = nil, min_bytes = 1)
         | 
| 208 | 
            +
                  term = encode(term) if term
         | 
| 209 | 
            +
                  begin
         | 
| 210 | 
            +
                    str = readline(term)
         | 
| 211 | 
            +
                    @scanner << str
         | 
| 212 | 
            +
                    read_bytes = str.bytesize
         | 
| 213 | 
            +
                    begin
         | 
| 214 | 
            +
                      while read_bytes < min_bytes
         | 
| 215 | 
            +
                        str = readline(term)
         | 
| 216 | 
            +
                        @scanner << str
         | 
| 217 | 
            +
                        read_bytes += str.bytesize
         | 
| 199 218 | 
             
                      end
         | 
| 219 | 
            +
                    rescue IOError
         | 
| 200 220 | 
             
                    end
         | 
| 201 | 
            -
                     | 
| 221 | 
            +
                    true
         | 
| 222 | 
            +
                  rescue Exception, NameError
         | 
| 223 | 
            +
                    @source = nil
         | 
| 224 | 
            +
                    false
         | 
| 202 225 | 
             
                  end
         | 
| 203 | 
            -
                  rv.taint if RUBY_VERSION < '2.7'
         | 
| 204 | 
            -
                  rv
         | 
| 205 226 | 
             
                end
         | 
| 206 227 |  | 
| 207 | 
            -
                def  | 
| 208 | 
            -
                   | 
| 209 | 
            -
             | 
| 210 | 
            -
                   | 
| 211 | 
            -
                    @source | 
| 228 | 
            +
                def read_until(term)
         | 
| 229 | 
            +
                  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
         | 
| 230 | 
            +
                  term = encode(term)
         | 
| 231 | 
            +
                  until str = @scanner.scan_until(pattern)
         | 
| 232 | 
            +
                    break if @source.nil?
         | 
| 233 | 
            +
                    break if @source.eof?
         | 
| 234 | 
            +
                    @scanner << readline(term)
         | 
| 235 | 
            +
                  end
         | 
| 236 | 
            +
                  if str
         | 
| 237 | 
            +
                    read if @scanner.eos? and !@source.eof?
         | 
| 238 | 
            +
                    str
         | 
| 239 | 
            +
                  else
         | 
| 240 | 
            +
                    rest = @scanner.rest
         | 
| 241 | 
            +
                    @scanner.pos = @scanner.string.bytesize
         | 
| 242 | 
            +
                    rest
         | 
| 212 243 | 
             
                  end
         | 
| 213 244 | 
             
                end
         | 
| 214 245 |  | 
| 215 | 
            -
                def  | 
| 216 | 
            -
                   | 
| 246 | 
            +
                def ensure_buffer
         | 
| 247 | 
            +
                  read if @scanner.eos? && @source
         | 
| 217 248 | 
             
                end
         | 
| 218 249 |  | 
| 219 250 | 
             
                def match( pattern, cons=false )
         | 
| 220 | 
            -
                   | 
| 221 | 
            -
                   | 
| 222 | 
            -
                  while  | 
| 223 | 
            -
                     | 
| 224 | 
            -
                       | 
| 225 | 
            -
             | 
| 226 | 
            -
                       | 
| 227 | 
            -
                    rescue
         | 
| 228 | 
            -
                      @source = nil
         | 
| 251 | 
            +
                  # To avoid performance issue, we need to increase bytes to read per scan
         | 
| 252 | 
            +
                  min_bytes = 1
         | 
| 253 | 
            +
                  while true
         | 
| 254 | 
            +
                    if cons
         | 
| 255 | 
            +
                      md = @scanner.scan(pattern)
         | 
| 256 | 
            +
                    else
         | 
| 257 | 
            +
                      md = @scanner.check(pattern)
         | 
| 229 258 | 
             
                    end
         | 
| 259 | 
            +
                    break if md
         | 
| 260 | 
            +
                    return nil if pattern.is_a?(String)
         | 
| 261 | 
            +
                    return nil if @source.nil?
         | 
| 262 | 
            +
                    return nil unless read(nil, min_bytes)
         | 
| 263 | 
            +
                    min_bytes *= 2
         | 
| 230 264 | 
             
                  end
         | 
| 231 | 
            -
             | 
| 232 | 
            -
                   | 
| 265 | 
            +
             | 
| 266 | 
            +
                  md.nil? ? nil : @scanner
         | 
| 233 267 | 
             
                end
         | 
| 234 268 |  | 
| 235 269 | 
             
                def empty?
         | 
| 236 270 | 
             
                  super and ( @source.nil? || @source.eof? )
         | 
| 237 271 | 
             
                end
         | 
| 238 272 |  | 
| 239 | 
            -
                def position
         | 
| 240 | 
            -
                  @er_source.pos rescue 0
         | 
| 241 | 
            -
                end
         | 
| 242 | 
            -
             | 
| 243 273 | 
             
                # @return the current line in the source
         | 
| 244 274 | 
             
                def current_line
         | 
| 245 275 | 
             
                  begin
         | 
| @@ -263,8 +293,8 @@ module REXML | |
| 263 293 | 
             
                end
         | 
| 264 294 |  | 
| 265 295 | 
             
                private
         | 
| 266 | 
            -
                def readline
         | 
| 267 | 
            -
                  str = @source.readline(@line_break)
         | 
| 296 | 
            +
                def readline(term = nil)
         | 
| 297 | 
            +
                  str = @source.readline(term || @line_break)
         | 
| 268 298 | 
             
                  if @pending_buffer
         | 
| 269 299 | 
             
                    if str.nil?
         | 
| 270 300 | 
             
                      str = @pending_buffer
         | 
| @@ -290,7 +320,7 @@ module REXML | |
| 290 320 | 
             
                    @source.set_encoding(@encoding, @encoding)
         | 
| 291 321 | 
             
                  end
         | 
| 292 322 | 
             
                  @line_break = encode(">")
         | 
| 293 | 
            -
                  @pending_buffer, @ | 
| 323 | 
            +
                  @pending_buffer, @scanner.string = @scanner.rest, ""
         | 
| 294 324 | 
             
                  @pending_buffer.force_encoding(@encoding)
         | 
| 295 325 | 
             
                  super
         | 
| 296 326 | 
             
                end
         | 
    
        data/lib/rexml/text.rb
    CHANGED
    
    | @@ -151,25 +151,45 @@ module REXML | |
| 151 151 | 
             
                    end
         | 
| 152 152 | 
             
                  end
         | 
| 153 153 |  | 
| 154 | 
            -
                   | 
| 155 | 
            -
                  string. | 
| 156 | 
            -
                    if  | 
| 157 | 
            -
                      raise "Illegal character #{ | 
| 158 | 
            -
                     | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 154 | 
            +
                  pos = 0
         | 
| 155 | 
            +
                  while (index = string.index(/<|&/, pos))
         | 
| 156 | 
            +
                    if string[index] == "<"
         | 
| 157 | 
            +
                      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
         | 
| 158 | 
            +
                    end
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                    unless (end_index = string.index(/[^\s];/, index + 1))
         | 
| 161 | 
            +
                      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
         | 
| 162 | 
            +
                    end
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                    value = string[(index + 1)..end_index]
         | 
| 165 | 
            +
                    if /\s/.match?(value)
         | 
| 166 | 
            +
                      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
         | 
| 167 | 
            +
                    end
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    if value[0] == "#"
         | 
| 170 | 
            +
                      character_reference = value[1..-1]
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                      unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
         | 
| 173 | 
            +
                        if character_reference[0] == "x" || character_reference[-1] == "x"
         | 
| 174 | 
            +
                          raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
         | 
| 162 175 | 
             
                        else
         | 
| 163 | 
            -
                          raise "Illegal character #{ | 
| 176 | 
            +
                          raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
         | 
| 164 177 | 
             
                        end
         | 
| 165 | 
            -
                      # FIXME: below can't work but this needs API change.
         | 
| 166 | 
            -
                      # elsif @parent and $3 and !SUBSTITUTES.include?($1)
         | 
| 167 | 
            -
                      #   if !doctype or !doctype.entities.has_key?($3)
         | 
| 168 | 
            -
                      #     raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
         | 
| 169 | 
            -
                      #   end
         | 
| 170 178 | 
             
                      end
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                      case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
         | 
| 181 | 
            +
                      when *VALID_CHAR
         | 
| 182 | 
            +
                      else
         | 
| 183 | 
            +
                        raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
         | 
| 184 | 
            +
                      end
         | 
| 185 | 
            +
                    elsif !(/\A#{Entity::NAME}\z/um.match?(value))
         | 
| 186 | 
            +
                      raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
         | 
| 171 187 | 
             
                    end
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                    pos = end_index + 1
         | 
| 172 190 | 
             
                  end
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                  string
         | 
| 173 193 | 
             
                end
         | 
| 174 194 |  | 
| 175 195 | 
             
                def node_type
         | 
    
        data/lib/rexml/xpath_parser.rb
    CHANGED
    
    | @@ -590,6 +590,7 @@ module REXML | |
| 590 590 |  | 
| 591 591 | 
             
                def evaluate_predicate(expression, nodesets)
         | 
| 592 592 | 
             
                  enter(:predicate, expression, nodesets) if @debug
         | 
| 593 | 
            +
                  new_nodeset_count = 0
         | 
| 593 594 | 
             
                  new_nodesets = nodesets.collect do |nodeset|
         | 
| 594 595 | 
             
                    new_nodeset = []
         | 
| 595 596 | 
             
                    subcontext = { :size => nodeset.size }
         | 
| @@ -606,17 +607,20 @@ module REXML | |
| 606 607 | 
             
                      result = result[0] if result.kind_of? Array and result.length == 1
         | 
| 607 608 | 
             
                      if result.kind_of? Numeric
         | 
| 608 609 | 
             
                        if result == node.position
         | 
| 609 | 
            -
                           | 
| 610 | 
            +
                          new_nodeset_count += 1
         | 
| 611 | 
            +
                          new_nodeset << XPathNode.new(node, position: new_nodeset_count)
         | 
| 610 612 | 
             
                        end
         | 
| 611 613 | 
             
                      elsif result.instance_of? Array
         | 
| 612 614 | 
             
                        if result.size > 0 and result.inject(false) {|k,s| s or k}
         | 
| 613 615 | 
             
                          if result.size > 0
         | 
| 614 | 
            -
                             | 
| 616 | 
            +
                            new_nodeset_count += 1
         | 
| 617 | 
            +
                            new_nodeset << XPathNode.new(node, position: new_nodeset_count)
         | 
| 615 618 | 
             
                          end
         | 
| 616 619 | 
             
                        end
         | 
| 617 620 | 
             
                      else
         | 
| 618 621 | 
             
                        if result
         | 
| 619 | 
            -
                           | 
| 622 | 
            +
                          new_nodeset_count += 1
         | 
| 623 | 
            +
                          new_nodeset << XPathNode.new(node, position: new_nodeset_count)
         | 
| 620 624 | 
             
                        end
         | 
| 621 625 | 
             
                      end
         | 
| 622 626 | 
             
                    end
         |