RubyGems - right_scraper - Versions diffs - 3.2.6 → 5.0.1 - Mend

right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

checksums.yaml +7 -0
data/lib/right_scraper.rb +16 -34
data/lib/right_scraper/builders.rb +32 -0
data/lib/right_scraper/builders/base.rb +19 -20
data/lib/right_scraper/builders/filesystem.rb +8 -6
data/lib/right_scraper/builders/union.rb +4 -1
data/lib/right_scraper/loggers.rb +31 -0
data/lib/right_scraper/loggers/base.rb +113 -0
data/lib/right_scraper/loggers/default.rb +98 -0
data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
data/lib/right_scraper/processes.rb +33 -0
data/lib/right_scraper/processes/shell.rb +227 -0
data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
data/lib/right_scraper/processes/svn_client.rb +117 -0
data/lib/right_scraper/processes/warden.rb +358 -0
data/lib/right_scraper/registered_base.rb +154 -0
data/lib/right_scraper/repositories.rb +33 -0
data/lib/right_scraper/repositories/base.rb +271 -232
data/lib/right_scraper/repositories/download.rb +8 -6
data/lib/right_scraper/repositories/git.rb +8 -9
data/lib/right_scraper/repositories/svn.rb +8 -8
data/lib/right_scraper/resources.rb +32 -0
data/lib/right_scraper/resources/base.rb +5 -1
data/lib/right_scraper/resources/cookbook.rb +34 -27
data/lib/right_scraper/resources/workflow.rb +27 -28
data/lib/right_scraper/retrievers.rb +34 -0
data/lib/right_scraper/retrievers/base.rb +80 -84
data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
data/lib/right_scraper/retrievers/download.rb +125 -117
data/lib/right_scraper/retrievers/git.rb +377 -223
data/lib/right_scraper/retrievers/svn.rb +102 -62
data/lib/right_scraper/scanners.rb +37 -0
data/lib/right_scraper/scanners/base.rb +77 -80
data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
data/lib/right_scraper/scanners/union.rb +61 -58
data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
data/lib/right_scraper/scrapers.rb +32 -0
data/lib/right_scraper/scrapers/base.rb +217 -205
data/lib/right_scraper/scrapers/cookbook.rb +42 -40
data/lib/right_scraper/scrapers/workflow.rb +57 -58
data/lib/right_scraper/version.rb +3 -0
data/right_scraper.gemspec +12 -16
metadata +57 -163
data/Gemfile +0 -15
data/Rakefile +0 -89
data/lib/right_scraper/logger.rb +0 -107
data/lib/right_scraper/loggers/noisy.rb +0 -85
data/lib/right_scraper/repositories/mock.rb +0 -70
data/lib/right_scraper/retrievers/checkout.rb +0 -79
data/lib/right_scraper/scraper_logger.rb +0 -66
data/lib/right_scraper/svn_client.rb +0 -164
data/right_scraper.rconf +0 -13
data/spec/builder_spec.rb +0 -50
data/spec/cookbook_helper.rb +0 -73
data/spec/cookbook_manifest_spec.rb +0 -93
data/spec/cookbook_s3_upload_spec.rb +0 -159
data/spec/download/download_retriever_spec.rb +0 -118
data/spec/download/download_retriever_spec_helper.rb +0 -72
data/spec/download/download_spec.rb +0 -128
data/spec/download/multi_dir_spec.rb +0 -106
data/spec/download/multi_dir_spec_helper.rb +0 -40
data/spec/git/cookbook_spec.rb +0 -165
data/spec/git/demokey +0 -27
data/spec/git/demokey.pub +0 -1
data/spec/git/password_key +0 -30
data/spec/git/password_key.pub +0 -1
data/spec/git/repository_spec.rb +0 -110
data/spec/git/retriever_spec.rb +0 -553
data/spec/git/retriever_spec_helper.rb +0 -112
data/spec/git/scraper_spec.rb +0 -151
data/spec/git/ssh_spec.rb +0 -174
data/spec/git/url_spec.rb +0 -103
data/spec/logger_spec.rb +0 -185
data/spec/repository_spec.rb +0 -111
data/spec/retriever_spec_helper.rb +0 -146
data/spec/scanner_spec.rb +0 -61
data/spec/scraper_helper.rb +0 -88
data/spec/scraper_spec.rb +0 -147
data/spec/spec_helper.rb +0 -185
data/spec/svn/cookbook_spec.rb +0 -96
data/spec/svn/multi_svn_spec.rb +0 -64
data/spec/svn/multi_svn_spec_helper.rb +0 -40
data/spec/svn/repository_spec.rb +0 -72
data/spec/svn/retriever_spec.rb +0 -266
data/spec/svn/scraper_spec.rb +0 -90
data/spec/svn/svn_retriever_spec_helper.rb +0 -90
data/spec/svn/url_spec.rb +0 -47
data/spec/url_spec.rb +0 -164

data/lib/right_scraper/retrievers/checkout_base.rb ADDED Viewed

@@ -0,0 +1,178 @@
+#--
+# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# 'Software'), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#++
+# ancestor
+require 'right_scraper/retrievers'
+require 'fileutils'
+module RightScraper::Retrievers
+  # Base class for retrievers that want to do version control operations
+  # (CVS, SVN, etc.). Subclasses can get away with implementing only
+  # Retrievers::Base#available? and #do_checkout but to support incremental
+  # operation need to implement #exists? and #do_update, in addition to
+  # Retrievers::Base#ignorable_paths.
+  class CheckoutBase < ::RightScraper::Retrievers::Base
+    # Attempts to update and then resorts to clean checkout for repository.
+    def retrieve
+      raise RetrieverError.new("retriever is unavailable") unless available?
+      updated = false
+      explanation = ''
+      if exists?
+        @logger.operation(:updating) do
+          # a retriever may be able to determine that the repo directory is
+          # already pointing to the same commit as the revision. in that case
+          # we can return quickly.
+          if remote_differs?
+            # there is no point in updating and failing the size check when the
+            # directory on disk already exceeds size limit; fall back to a clean
+            # checkout in hopes that the latest revision corrects the issue.
+            if size_limit_exceeded?
+              explanation = 'switching to checkout due to existing directory exceeding size limimt'
+            else
+              # attempt update.
+              begin
+                do_update
+                updated = true
+              rescue ::RightScraper::Processes::Shell::LimitError
+                # update exceeded a limitation; requires user intervention
+                raise
+              rescue Exception => e
+                # retry with clean checkout after discarding repo dir.
+                explanation = 'switching to checkout after unsuccessful update'
+              end
+            end
+          else
+            # no retrieval needed but warn exactly why we didn't do full
+            # checkout to avoid being challenged about it.
+            repo_ref = @repository.tag
+            do_update_tag
+            full_head_ref = @repository.tag
+            abbreviated_head_ref = full_head_ref[0..6]
+            if repo_ref == full_head_ref || repo_ref == abbreviated_head_ref
+              detail = abbreviated_head_ref
+            else
+              detail = "#{repo_ref} = #{abbreviated_head_ref}"
+            end
+            message =
+              "Skipped updating local directory due to the HEAD commit SHA " +
+              "on local matching the remote repository reference (#{detail})."
+            @logger.note_warning(message)
+            return false
+          end
+        end
+      end
+      # Clean checkout only if not updated.
+      unless updated
+        @logger.operation(:checkout, explanation) do
+          # remove any full or partial directory before attempting a clean
+          # checkout in case repo_dir is in a bad state.
+          if exists?
+            ::FileUtils.remove_entry_secure(@repo_dir)
+          end
+          ::FileUtils.mkdir_p(@repo_dir)
+          begin
+            do_checkout
+          rescue Exception
+            # clean checkout failed; repo directory is in an undetermined
+            # state and must be deleted to prevent a future update attempt.
+            if exists?
+              ::FileUtils.remove_entry_secure(@repo_dir) rescue nil
+            end
+            raise
+          end
+        end
+      end
+      true
+    end
+    # Return true if a checkout exists.
+    #
+    # === Returns
+    # Boolean:: true if the checkout already exists (and thus
+    #           incremental updating can occur).
+    def exists?
+      false
+    end
+    # Determines if the remote SHA/tag/branch referenced by the repostory
+    # differs from what appears on disk, if possible. Not all retrievers will
+    # have this capability. If not, the retriever should default to returning
+    # true to indicate that the remote is changed.
+    #
+    # @return [TrueClass|FalseClass] true if changed
+    def remote_differs?
+      true
+    end
+    # Determines if total size of files in repo_dir has exceeded size limit.
+    #
+    # === Return
+    # @return [TrueClass|FalseClass] true if size limit exceeded
+    def size_limit_exceeded?
+      if @max_bytes
+        # note that Dir.glob ignores hidden directories (e.g. ".git") so the
+        # size total correctly excludes those hidden contents that are not to
+        # be uploaded after scrape. this may cause the on-disk directory size
+        # to far exceed the upload size.
+        globbie = ::File.join(@repo_dir, '**/*')
+        size = 0
+        ::Dir.glob(globbie) do |f|
+          size += ::File.stat(f).size rescue 0 if ::File.file?(f)
+          break if size > @max_bytes
+        end
+        size > @max_bytes
+      else
+        false
+      end
+    end
+    # Perform a de novo full checkout of the repository.  Subclasses
+    # must override this to do anything useful.
+    #
+    # @return [TrueClass] always true
+    def do_checkout
+      raise NotImplementedError
+    end
+    # Perform an incremental update of the checkout.  Subclasses that
+    # want to handle incremental updating need to override this.
+    #
+    # @return [TrueClass] always true
+    def do_update
+      raise NotImplementedError
+    end
+    # Updates the tag of the repository associated with this retriever to refer
+    # to the HEAD commit (SHA) on disk after retrieval.
+    #
+    # @return [TrueClass] always true
+    def do_update_tag
+      raise NotImplementedError
+    end
+  end
+end

data/lib/right_scraper/retrievers/download.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 #--
-# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
+# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -21,68 +21,110 @@
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #++
+# ancestor
+require 'right_scraper/retrievers'
+require 'fileutils'
 require 'tempfile'
 require 'digest/sha1'
 require 'right_popen'
 require 'right_popen/safe_output_buffer'
-module RightScraper
-  module Retrievers
-    # A retriever for resources stored in archives on a web server
-    # somewhere.  Uses command line curl and command line tar.
-    class Download < Base
+module RightScraper::Retrievers
-      class DownloadError < Exception; end
+  # A retriever for resources stored in archives on a web server
+  # somewhere.  Uses command line curl and command line tar.
+  class Download < ::RightScraper::Retrievers::Base
-      @@available = false
+    class DownloadError < Exception; end
-      # Determines if downloader is available.
-      def available?
-        unless @@available
-          begin
-            # FIX: we might want to parse the result and require a minimum curl
-            # version.
-            cmd = "curl --version"
-            `#{cmd}`
-            if $?.success?
-              @@available = true
-            else
-              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
-            end
-          rescue
-            @logger.note_error($!, :available, "download retriever is unavailable")
+    @@available = false
+    # Determines if downloader is available.
+    def available?
+      unless @@available
+        begin
+          # FIX: we might want to parse the result and require a minimum curl
+          # version.
+          cmd = "curl --version"
+          `#{cmd}`
+          if $?.success?
+            @@available = true
+          else
+            raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
           end
+        rescue
+          @logger.note_error($!, :available, "download retriever is unavailable")
         end
-        @@available
       end
+      @@available
+    end
+    # Directory used to download tarballs
+    def workdir
+      @workdir ||= ::File.join(::File.dirname(@repo_dir), 'download')
+    end
-      # Directory used to download tarballs
-      def workdir
-        @workdir ||= ::File.join(::File.dirname(@repo_dir), 'download')
+    # Download tarball and unpack it
+    def retrieve
+      raise RetrieverError.new("download retriever is unavailable") unless available?
+      ::FileUtils.remove_entry_secure @repo_dir if File.exists?(@repo_dir)
+      ::FileUtils.remove_entry_secure workdir if File.exists?(workdir)
+      ::FileUtils.mkdir_p @repo_dir
+      ::FileUtils.mkdir_p workdir
+      file = ::File.join(workdir, "package")
+      # TEAL FIX: we have to always-download the tarball before we can
+      # determine if contents have changed, but afterward we can compare the
+      # previous download against the latest downloaded and short-circuit the
+      # remaining flow for the no-difference case.
+      @logger.operation(:downloading) do
+        credential_command = if @repository.first_credential && @repository.second_credential
+          ['-u', "#{@repository.first_credential}:#{@repository.second_credential}"]
+        else
+          []
+        end
+        @output = ::RightScale::RightPopen::SafeOutputBuffer.new
+        @cmd = [
+          'curl',
+          '--silent', '--show-error', '--location', '--fail',
+          '--location-trusted', '-o', file, credential_command,
+          @repository.url
+        ].flatten
+        begin
+          ::RightScale::RightPopen.popen3_sync(
+            @cmd,
+            :target             => self,
+            :pid_handler        => :pid_download,
+            :timeout_handler    => :timeout_download,
+            :size_limit_handler => :size_limit_download,
+            :exit_handler       => :exit_download,
+            :stderr_handler     => :output_download,
+            :stdout_handler     => :output_download,
+            :inherit_io         => true,  # avoid killing any rails connection
+            :watch_directory    => workdir,
+            :size_limit_bytes   => @max_bytes,
+            :timeout_seconds    => @max_seconds)
+        rescue Exception => e
+          @logger.note_phase(:abort, :running_command, 'curl', e)
+          raise
+        end
       end
-      # Download tarball and unpack it
-      def retrieve
-        raise RetrieverError.new("download retriever is unavailable") unless available?
-        FileUtils.remove_entry_secure @repo_dir if File.exists?(@repo_dir)
-        FileUtils.remove_entry_secure workdir if File.exists?(workdir)
-        FileUtils.mkdir_p @repo_dir
-        FileUtils.mkdir_p workdir
-        file = File.join(workdir, "package")
-        @logger.operation(:downloading) do
-          credential_command = if @repository.first_credential && @repository.second_credential
-            ['-u', "#{@repository.first_credential}:#{@repository.second_credential}"]
-          else
-            []
-          end
+      note_tag(file)
+      @logger.operation(:unpacking) do
+        path = @repository.to_url.path
+        if path =~ /\.gz$/
+          extraction = "xzf"
+        elsif path =~ /\.bz2$/
+          extraction = "xjf"
+        else
+          extraction = "xf"
+        end
+        Dir.chdir(@repo_dir) do
           @output = ::RightScale::RightPopen::SafeOutputBuffer.new
-          @cmd = [
-            'curl',
-            '--silent', '--show-error', '--location', '--fail',
-            '--location-trusted', '-o', file, credential_command,
-            @repository.url
-          ].flatten
+          @cmd = ['tar', extraction, file]
           begin
             ::RightScale::RightPopen.popen3_sync(
               @cmd,
@@ -94,90 +136,56 @@ module RightScraper
               :stderr_handler     => :output_download,
               :stdout_handler     => :output_download,
               :inherit_io         => true,  # avoid killing any rails connection
-              :watch_directory    => workdir,
+              :watch_directory    => @repo_dir,
               :size_limit_bytes   => @max_bytes,
               :timeout_seconds    => @max_seconds)
           rescue Exception => e
-            @logger.note_phase(:abort, :running_command, 'curl', e)
+            @logger.note_phase(:abort, :running_command, @cmd.first, e)
             raise
           end
         end
-        note_tag(file)
-        @logger.operation(:unpacking) do
-          path = @repository.to_url.path
-          if path =~ /\.gz$/
-            extraction = "xzf"
-          elsif path =~ /\.bz2$/
-            extraction = "xjf"
-          else
-            extraction = "xf"
-          end
-          Dir.chdir(@repo_dir) do
-            @output = ::RightScale::RightPopen::SafeOutputBuffer.new
-            @cmd = ['tar', extraction, file]
-            begin
-              ::RightScale::RightPopen.popen3_sync(
-                @cmd,
-                :target             => self,
-                :pid_handler        => :pid_download,
-                :timeout_handler    => :timeout_download,
-                :size_limit_handler => :size_limit_download,
-                :exit_handler       => :exit_download,
-                :stderr_handler     => :output_download,
-                :stdout_handler     => :output_download,
-                :inherit_io         => true,  # avoid killing any rails connection
-                :watch_directory    => @repo_dir,
-                :size_limit_bytes   => @max_bytes,
-                :timeout_seconds    => @max_seconds)
-            rescue Exception => e
-              @logger.note_phase(:abort, :running_command, @cmd.first, e)
-              raise
-            end
-          end
-        end
       end
+      true
+    end
-      def pid_download(pid)
-        @logger.note_phase(:begin, :running_command, @cmd.first)
-        true
-      end
+    def pid_download(pid)
+      @logger.note_phase(:begin, :running_command, @cmd.first)
+      true
+    end
-      def output_download(data)
-        @output.safe_buffer_data(data)
-      end
+    def output_download(data)
+      @output.safe_buffer_data(data)
+    end
-      def timeout_download
-        raise DownloadError, "Downloader timed out"
-      end
+    def timeout_download
+      raise DownloadError, "Downloader timed out"
+    end
-      def size_limit_download
-        raise DownloadError, "Downloader exceeded size limit"
-      end
+    def size_limit_download
+      raise DownloadError, "Downloader exceeded size limit"
+    end
-      def exit_download(status)
-        unless status.success?
-          @output.safe_buffer_data("Exit code = #{status.exitstatus}")
-          raise DownloadError, "Downloader failed: #{@output.display_text}"
-        end
-        @logger.note_phase(:commit, :running_command, @cmd.first)
-        true
+    def exit_download(status)
+      unless status.success?
+        @output.safe_buffer_data("Exit code = #{status.exitstatus}")
+        raise DownloadError, "Downloader failed: #{@output.display_text}"
       end
+      @logger.note_phase(:commit, :running_command, @cmd.first)
+      true
+    end
-      # Amend @repository with the tag information from the downloaded
-      # file.
-      #
-      # === Parameters
-      # file(String):: file that was downloaded
-      def note_tag(file)
-        digest = Digest::SHA1.new
-        File.open(file) {|f| digest << f.read(4096) }
-        repo = @repository.clone
-        repo.tag = digest.hexdigest
-        @repository = repo
-      end
+    # Amend @repository with the tag information from the downloaded
+    # file.
+    #
+    # === Parameters
+    # file(String):: file that was downloaded
+    def note_tag(file)
+      digest = Digest::SHA1.new
+      File.open(file) {|f| digest << f.read(4096) }
+      repo = @repository.clone
+      repo.tag = digest.hexdigest
+      @repository = repo
     end
   end
 end