RubyGems - rexml - Versions diffs - 3.2.5 → 3.3.0 - Mend

rexml 3.2.5 → 3.3.0

Potentially problematic release.

This version of rexml might be problematic. Click here for more details.

Files changed (21) hide show

checksums.yaml +4 -4
data/NEWS.md +204 -2
data/README.md +10 -1
data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
data/doc/rexml/tutorial.rdoc +1358 -0
data/lib/rexml/attribute.rb +14 -9
data/lib/rexml/document.rb +1 -1
data/lib/rexml/element.rb +3 -3
data/lib/rexml/entity.rb +25 -15
data/lib/rexml/formatters/pretty.rb +2 -2
data/lib/rexml/functions.rb +1 -2
data/lib/rexml/namespace.rb +8 -4
data/lib/rexml/node.rb +8 -4
data/lib/rexml/parseexception.rb +1 -0
data/lib/rexml/parsers/baseparser.rb +247 -229
data/lib/rexml/parsers/xpathparser.rb +136 -86
data/lib/rexml/rexml.rb +3 -1
data/lib/rexml/source.rb +114 -100
data/lib/rexml/text.rb +6 -4
data/lib/rexml/xpath_parser.rb +7 -3
metadata +12 -38

data/lib/rexml/parsers/xpathparser.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: false
 require_relative '../namespace'
 require_relative '../xmltokens'
@@ -38,108 +39,143 @@ module REXML
         parsed
       end
-      def abbreviate( path )
-        path = path.kind_of?(String) ? parse( path ) : path
-        string = ""
-        document = false
-        while path.size > 0
-          op = path.shift
+      def abbreviate(path_or_parsed)
+        if path_or_parsed.kind_of?(String)
+          parsed = parse(path_or_parsed)
+        else
+          parsed = path_or_parsed
+        end
+        components = []
+        component = nil
+        while parsed.size > 0
+          op = parsed.shift
           case op
           when :node
+            component << "node()"
           when :attribute
-            string << "/" if string.size > 0
-            string << "@"
+            component = "@"
+            components << component
           when :child
-            string << "/" if string.size > 0
+            component = ""
+            components << component
           when :descendant_or_self
-            string << "/"
+            next_op = parsed[0]
+            if next_op == :node
+              parsed.shift
+              component = ""
+              components << component
+            else
+              component = "descendant-or-self::"
+              components << component
+            end
           when :self
-            string << "."
+            next_op = parsed[0]
+            if next_op == :node
+              parsed.shift
+              components << "."
+            else
+              component = "self::"
+              components << component
+            end
           when :parent
-            string << ".."
+            next_op = parsed[0]
+            if next_op == :node
+              parsed.shift
+              components << ".."
+            else
+              component = "parent::"
+              components << component
+            end
           when :any
-            string << "*"
+            component << "*"
           when :text
-            string << "text()"
+            component << "text()"
           when :following, :following_sibling,
                 :ancestor, :ancestor_or_self, :descendant,
                 :namespace, :preceding, :preceding_sibling
-            string << "/" unless string.size == 0
-            string << op.to_s.tr("_", "-")
-            string << "::"
+            component = op.to_s.tr("_", "-") << "::"
+            components << component
           when :qname
-            prefix = path.shift
-            name = path.shift
-            string << prefix+":" if prefix.size > 0
-            string << name
+            prefix = parsed.shift
+            name = parsed.shift
+            component << prefix+":" if prefix.size > 0
+            component << name
           when :predicate
-            string << '['
-            string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
-            string << ']'
+            component << '['
+            component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
+            component << ']'
           when :document
-            document = true
+            components << ""
           when :function
-            string << path.shift
-            string << "( "
-            string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
-            string << " )"
+            component << parsed.shift
+            component << "( "
+            component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
+            component << " )"
           when :literal
-            string << %Q{ "#{path.shift}" }
+            component << quote_literal(parsed.shift)
           else
-            string << "/" unless string.size == 0
-            string << "UNKNOWN("
-            string << op.inspect
-            string << ")"
+            component << "UNKNOWN("
+            component << op.inspect
+            component << ")"
           end
         end
-        string = "/"+string if document
-        return string
+        case components
+        when [""]
+          "/"
+        when ["", ""]
+          "//"
+        else
+          components.join("/")
+        end
       end
-      def expand( path )
-        path = path.kind_of?(String) ? parse( path ) : path
-        string = ""
+      def expand(path_or_parsed)
+        if path_or_parsed.kind_of?(String)
+          parsed = parse(path_or_parsed)
+        else
+          parsed = path_or_parsed
+        end
+        path = ""
         document = false
-        while path.size > 0
-          op = path.shift
+        while parsed.size > 0
+          op = parsed.shift
           case op
           when :node
-            string << "node()"
+            path << "node()"
           when :attribute, :child, :following, :following_sibling,
                 :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
                 :namespace, :preceding, :preceding_sibling, :self, :parent
-            string << "/" unless string.size == 0
-            string << op.to_s.tr("_", "-")
-            string << "::"
+            path << "/" unless path.size == 0
+            path << op.to_s.tr("_", "-")
+            path << "::"
           when :any
-            string << "*"
+            path << "*"
           when :qname
-            prefix = path.shift
-            name = path.shift
-            string << prefix+":" if prefix.size > 0
-            string << name
+            prefix = parsed.shift
+            name = parsed.shift
+            path << prefix+":" if prefix.size > 0
+            path << name
           when :predicate
-            string << '['
-            string << predicate_to_string( path.shift ) { |x| expand(x) }
-            string << ']'
+            path << '['
+            path << predicate_to_path( parsed.shift ) { |x| expand(x) }
+            path << ']'
           when :document
             document = true
           else
-            string << "/" unless string.size == 0
-            string << "UNKNOWN("
-            string << op.inspect
-            string << ")"
+            path << "UNKNOWN("
+            path << op.inspect
+            path << ")"
           end
         end
-        string = "/"+string if document
-        return string
+        path = "/"+path if document
+        path
       end
-      def predicate_to_string( path, &block )
-        string = ""
-        case path[0]
+      def predicate_to_path(parsed, &block)
+        path = ""
+        case parsed[0]
         when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
-          op = path.shift
+          op = parsed.shift
           case op
           when :eq
             op = "="
@@ -156,36 +192,50 @@ module REXML
           when :union
             op = "|"
           end
-          left = predicate_to_string( path.shift, &block )
-          right = predicate_to_string( path.shift, &block )
-          string << " "
-          string << left
-          string << " "
-          string << op.to_s
-          string << " "
-          string << right
-          string << " "
+          left = predicate_to_path( parsed.shift, &block )
+          right = predicate_to_path( parsed.shift, &block )
+          path << left
+          path << " "
+          path << op.to_s
+          path << " "
+          path << right
         when :function
-          path.shift
-          name = path.shift
-          string << name
-          string << "( "
-          string << predicate_to_string( path.shift, &block )
-          string << " )"
+          parsed.shift
+          name = parsed.shift
+          path << name
+          path << "("
+          parsed.shift.each_with_index do |argument, i|
+            path << ", " if i > 0
+            path << predicate_to_path(argument, &block)
+          end
+          path << ")"
         when :literal
-          path.shift
-          string << " "
-          string << path.shift.inspect
-          string << " "
+          parsed.shift
+          path << quote_literal(parsed.shift)
         else
-          string << " "
-          string << yield( path )
-          string << " "
+          path << yield( parsed )
         end
-        return string.squeeze(" ")
+        return path.squeeze(" ")
       end
+      # For backward compatibility
+      alias_method :preciate_to_string, :predicate_to_path
       private
+      def quote_literal( literal )
+        case literal
+        when String
+          # XPath 1.0 does not support escape characters.
+          # Assumes literal does not contain both single and double quotes.
+          if literal.include?("'")
+            "\"#{literal}\""
+          else
+            "'#{literal}'"
+          end
+        else
+          literal.inspect
+        end
+      end
       #LocationPath
       #  | RelativeLocationPath
       #  | '/' RelativeLocationPath?

data/lib/rexml/rexml.rb CHANGED Viewed

@@ -26,10 +26,12 @@
 # - REXML::Document.
 # - REXML::Element.
 #
+# There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
+#
 module REXML
   COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
   DATE = "2008/019"
-  VERSION = "3.2.5"
+  VERSION = "3.3.0"
   REVISION = ""
   Copyright = COPYRIGHT

data/lib/rexml/source.rb CHANGED Viewed

@@ -1,8 +1,28 @@
 # coding: US-ASCII
 # frozen_string_literal: false
+require "strscan"
 require_relative 'encoding'
 module REXML
+  if StringScanner::Version < "1.0.0"
+    module StringScannerCheckScanString
+      refine StringScanner do
+        def check(pattern)
+          pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
+          super(pattern)
+        end
+        def scan(pattern)
+          pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
+          super(pattern)
+        end
+      end
+    end
+    using StringScannerCheckScanString
+  end
   # Generates Source-s.  USE THIS CLASS.
   class SourceFactory
     # Generates a Source object
@@ -30,18 +50,27 @@ module REXML
   # objects and provides consumption of text
   class Source
     include Encoding
-    # The current buffer (what we're going to read next)
-    attr_reader :buffer
     # The line number of the last consumed text
     attr_reader :line
     attr_reader :encoding
+    module Private
+      PRE_DEFINED_TERM_PATTERNS = {}
+      pre_defined_terms = ["'", '"', "<"]
+      pre_defined_terms.each do |term|
+        PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
+      end
+    end
+    private_constant :Private
+    include Private
     # Constructor
     # @param arg must be a String, and should be a valid XML document
     # @param encoding if non-null, sets the encoding of the source to this
     # value, overriding all encoding detection
     def initialize(arg, encoding=nil)
-      @orig = @buffer = arg
+      @orig = arg
+      @scanner = StringScanner.new(@orig)
       if encoding
         self.encoding = encoding
       else
@@ -50,6 +79,14 @@ module REXML
       @line = 0
     end
+    # The current buffer (what we're going to read next)
+    def buffer
+      @scanner.rest
+    end
+    def buffer_encoding=(encoding)
+      @scanner.string.force_encoding(encoding)
+    end
     # Inherited from Encoding
     # Overridden to support optimized en/decoding
@@ -58,98 +95,78 @@ module REXML
       encoding_updated
     end
-    # Scans the source for a given pattern.  Note, that this is not your
-    # usual scan() method.  For one thing, the pattern argument has some
-    # requirements; for another, the source can be consumed.  You can easily
-    # confuse this method.  Originally, the patterns were easier
-    # to construct and this method more robust, because this method
-    # generated search regexps on the fly; however, this was
-    # computationally expensive and slowed down the entire REXML package
-    # considerably, since this is by far the most commonly called method.
-    # @param pattern must be a Regexp, and must be in the form of
-    # /^\s*(#{your pattern, with no groups})(.*)/.  The first group
-    # will be returned; the second group is used if the consume flag is
-    # set.
-    # @param consume if true, the pattern returned will be consumed, leaving
-    # everything after it in the Source.
-    # @return the pattern, if found, or nil if the Source is empty or the
-    # pattern is not found.
-    def scan(pattern, cons=false)
-      return nil if @buffer.nil?
-      rv = @buffer.scan(pattern)
-      @buffer = $' if cons and rv.size>0
-      rv
+    def read(term = nil)
     end
-    def read
+    def read_until(term)
+      pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
+      data = @scanner.scan_until(pattern)
+      unless data
+        data = @scanner.rest
+        @scanner.pos = @scanner.string.bytesize
+      end
+      data
     end
-    def consume( pattern )
-      @buffer = $' if pattern.match( @buffer )
+    def ensure_buffer
     end
-    def match_to( char, pattern )
-      return pattern.match(@buffer)
+    def match(pattern, cons=false)
+      if cons
+        @scanner.scan(pattern).nil? ? nil : @scanner
+      else
+        @scanner.check(pattern).nil? ? nil : @scanner
+      end
     end
-    def match_to_consume( char, pattern )
-      md = pattern.match(@buffer)
-      @buffer = $'
-      return md
+    def position
+      @scanner.pos
     end
-    def match(pattern, cons=false)
-      md = pattern.match(@buffer)
-      @buffer = $' if cons and md
-      return md
+    def position=(pos)
+      @scanner.pos = pos
     end
     # @return true if the Source is exhausted
     def empty?
-      @buffer == ""
-    end
-    def position
-      @orig.index( @buffer )
+      @scanner.eos?
     end
     # @return the current line in the source
     def current_line
       lines = @orig.split
-      res = lines.grep @buffer[0..30]
+      res = lines.grep @scanner.rest[0..30]
       res = res[-1] if res.kind_of? Array
       lines.index( res ) if res
     end
     private
     def detect_encoding
-      buffer_encoding = @buffer.encoding
+      scanner_encoding = @scanner.rest.encoding
       detected_encoding = "UTF-8"
       begin
-        @buffer.force_encoding("ASCII-8BIT")
-        if @buffer[0, 2] == "\xfe\xff"
-          @buffer[0, 2] = ""
+        @scanner.string.force_encoding("ASCII-8BIT")
+        if @scanner.scan(/\xfe\xff/n)
           detected_encoding = "UTF-16BE"
-        elsif @buffer[0, 2] == "\xff\xfe"
-          @buffer[0, 2] = ""
+        elsif @scanner.scan(/\xff\xfe/n)
           detected_encoding = "UTF-16LE"
-        elsif @buffer[0, 3] == "\xef\xbb\xbf"
-          @buffer[0, 3] = ""
+        elsif @scanner.scan(/\xef\xbb\xbf/n)
           detected_encoding = "UTF-8"
         end
       ensure
-        @buffer.force_encoding(buffer_encoding)
+        @scanner.string.force_encoding(scanner_encoding)
       end
       self.encoding = detected_encoding
     end
     def encoding_updated
       if @encoding != 'UTF-8'
-        @buffer = decode(@buffer)
+        @scanner.string = decode(@scanner.rest)
         @to_utf = true
       else
         @to_utf = false
-        @buffer.force_encoding ::Encoding::UTF_8
+        @scanner.string.force_encoding(::Encoding::UTF_8)
       end
     end
   end
@@ -172,7 +189,7 @@ module REXML
       end
       if !@to_utf and
-          @buffer.respond_to?(:force_encoding) and
+          @orig.respond_to?(:force_encoding) and
           @source.respond_to?(:external_encoding) and
           @source.external_encoding != ::Encoding::UTF_8
         @force_utf8 = true
@@ -181,65 +198,62 @@ module REXML
       end
     end
-    def scan(pattern, cons=false)
-      rv = super
-      # You'll notice that this next section is very similar to the same
-      # section in match(), but just a liiittle different.  This is
-      # because it is a touch faster to do it this way with scan()
-      # than the way match() does it; enough faster to warrant duplicating
-      # some code
-      if rv.size == 0
-        until @buffer =~ pattern or @source.nil?
-          begin
-            @buffer << readline
-          rescue Iconv::IllegalSequence
-            raise
-          rescue
-            @source = nil
-          end
-        end
-        rv = super
-      end
-      rv.taint if RUBY_VERSION < '2.7'
-      rv
-    end
-    def read
+    def read(term = nil)
+      term = encode(term) if term
       begin
-        @buffer << readline
+        @scanner << readline(term)
+        true
       rescue Exception, NameError
         @source = nil
+        false
+      end
+    end
+    def read_until(term)
+      pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
+      term = encode(term)
+      until str = @scanner.scan_until(pattern)
+        break if @source.nil?
+        break if @source.eof?
+        @scanner << readline(term)
+      end
+      if str
+        read if @scanner.eos? and !@source.eof?
+        str
+      else
+        rest = @scanner.rest
+        @scanner.pos = @scanner.string.bytesize
+        rest
       end
     end
-    def consume( pattern )
-      match( pattern, true )
+    def ensure_buffer
+      read if @scanner.eos? && @source
     end
+    # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
+    # - ">"
+    # - "XXX>" (X is any string excluding '>')
     def match( pattern, cons=false )
-      rv = pattern.match(@buffer)
-      @buffer = $' if cons and rv
-      while !rv and @source
-        begin
-          @buffer << readline
-          rv = pattern.match(@buffer)
-          @buffer = $' if cons and rv
-        rescue
-          @source = nil
+      while true
+        if cons
+          md = @scanner.scan(pattern)
+        else
+          md = @scanner.check(pattern)
         end
+        break if md
+        return nil if pattern.is_a?(String)
+        return nil if @source.nil?
+        return nil unless read
       end
-      rv.taint if RUBY_VERSION < '2.7'
-      rv
+      md.nil? ? nil : @scanner
     end
     def empty?
       super and ( @source.nil? || @source.eof? )
     end
-    def position
-      @er_source.pos rescue 0
-    end
     # @return the current line in the source
     def current_line
       begin
@@ -263,8 +277,8 @@ module REXML
     end
     private
-    def readline
-      str = @source.readline(@line_break)
+    def readline(term = nil)
+      str = @source.readline(term || @line_break)
       if @pending_buffer
         if str.nil?
           str = @pending_buffer
@@ -290,7 +304,7 @@ module REXML
         @source.set_encoding(@encoding, @encoding)
       end
       @line_break = encode(">")
-      @pending_buffer, @buffer = @buffer, ""
+      @pending_buffer, @scanner.string = @scanner.rest, ""
       @pending_buffer.force_encoding(@encoding)
       super
     end

data/lib/rexml/text.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
 require_relative 'security'
 require_relative 'entity'
 require_relative 'doctype'
@@ -131,7 +131,7 @@ module REXML
     def Text.check string, pattern, doctype
       # illegal anywhere
-      if string !~ VALID_XML_CHARS
+      if !string.match?(VALID_XML_CHARS)
         if String.method_defined? :encode
           string.chars.each do |c|
             case c.ord
@@ -371,7 +371,7 @@ module REXML
       copy = input.to_s
       # Doing it like this rather than in a loop improves the speed
       #copy = copy.gsub( EREFERENCE, '&amp;' )
-      copy = copy.gsub( "&", "&amp;" )
+      copy = copy.gsub( "&", "&amp;" ) if copy.include?("&")
       if doctype
         # Replace all ampersands that aren't part of an entity
         doctype.entities.each_value do |entity|
@@ -382,7 +382,9 @@ module REXML
       else
         # Replace all ampersands that aren't part of an entity
         DocType::DEFAULT_ENTITIES.each_value do |entity|
-          copy = copy.gsub(entity.value, "&#{entity.name};" )
+          if copy.include?(entity.value)
+            copy = copy.gsub(entity.value, "&#{entity.name};" )
+          end
         end
       end
       copy