RubyGems - right_scraper - Versions diffs - 3.2.6 → 5.0.1 - Mend

right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

checksums.yaml +7 -0
data/lib/right_scraper.rb +16 -34
data/lib/right_scraper/builders.rb +32 -0
data/lib/right_scraper/builders/base.rb +19 -20
data/lib/right_scraper/builders/filesystem.rb +8 -6
data/lib/right_scraper/builders/union.rb +4 -1
data/lib/right_scraper/loggers.rb +31 -0
data/lib/right_scraper/loggers/base.rb +113 -0
data/lib/right_scraper/loggers/default.rb +98 -0
data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
data/lib/right_scraper/processes.rb +33 -0
data/lib/right_scraper/processes/shell.rb +227 -0
data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
data/lib/right_scraper/processes/svn_client.rb +117 -0
data/lib/right_scraper/processes/warden.rb +358 -0
data/lib/right_scraper/registered_base.rb +154 -0
data/lib/right_scraper/repositories.rb +33 -0
data/lib/right_scraper/repositories/base.rb +271 -232
data/lib/right_scraper/repositories/download.rb +8 -6
data/lib/right_scraper/repositories/git.rb +8 -9
data/lib/right_scraper/repositories/svn.rb +8 -8
data/lib/right_scraper/resources.rb +32 -0
data/lib/right_scraper/resources/base.rb +5 -1
data/lib/right_scraper/resources/cookbook.rb +34 -27
data/lib/right_scraper/resources/workflow.rb +27 -28
data/lib/right_scraper/retrievers.rb +34 -0
data/lib/right_scraper/retrievers/base.rb +80 -84
data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
data/lib/right_scraper/retrievers/download.rb +125 -117
data/lib/right_scraper/retrievers/git.rb +377 -223
data/lib/right_scraper/retrievers/svn.rb +102 -62
data/lib/right_scraper/scanners.rb +37 -0
data/lib/right_scraper/scanners/base.rb +77 -80
data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
data/lib/right_scraper/scanners/union.rb +61 -58
data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
data/lib/right_scraper/scrapers.rb +32 -0
data/lib/right_scraper/scrapers/base.rb +217 -205
data/lib/right_scraper/scrapers/cookbook.rb +42 -40
data/lib/right_scraper/scrapers/workflow.rb +57 -58
data/lib/right_scraper/version.rb +3 -0
data/right_scraper.gemspec +12 -16
metadata +57 -163
data/Gemfile +0 -15
data/Rakefile +0 -89
data/lib/right_scraper/logger.rb +0 -107
data/lib/right_scraper/loggers/noisy.rb +0 -85
data/lib/right_scraper/repositories/mock.rb +0 -70
data/lib/right_scraper/retrievers/checkout.rb +0 -79
data/lib/right_scraper/scraper_logger.rb +0 -66
data/lib/right_scraper/svn_client.rb +0 -164
data/right_scraper.rconf +0 -13
data/spec/builder_spec.rb +0 -50
data/spec/cookbook_helper.rb +0 -73
data/spec/cookbook_manifest_spec.rb +0 -93
data/spec/cookbook_s3_upload_spec.rb +0 -159
data/spec/download/download_retriever_spec.rb +0 -118
data/spec/download/download_retriever_spec_helper.rb +0 -72
data/spec/download/download_spec.rb +0 -128
data/spec/download/multi_dir_spec.rb +0 -106
data/spec/download/multi_dir_spec_helper.rb +0 -40
data/spec/git/cookbook_spec.rb +0 -165
data/spec/git/demokey +0 -27
data/spec/git/demokey.pub +0 -1
data/spec/git/password_key +0 -30
data/spec/git/password_key.pub +0 -1
data/spec/git/repository_spec.rb +0 -110
data/spec/git/retriever_spec.rb +0 -553
data/spec/git/retriever_spec_helper.rb +0 -112
data/spec/git/scraper_spec.rb +0 -151
data/spec/git/ssh_spec.rb +0 -174
data/spec/git/url_spec.rb +0 -103
data/spec/logger_spec.rb +0 -185
data/spec/repository_spec.rb +0 -111
data/spec/retriever_spec_helper.rb +0 -146
data/spec/scanner_spec.rb +0 -61
data/spec/scraper_helper.rb +0 -88
data/spec/scraper_spec.rb +0 -147
data/spec/spec_helper.rb +0 -185
data/spec/svn/cookbook_spec.rb +0 -96
data/spec/svn/multi_svn_spec.rb +0 -64
data/spec/svn/multi_svn_spec_helper.rb +0 -40
data/spec/svn/repository_spec.rb +0 -72
data/spec/svn/retriever_spec.rb +0 -266
data/spec/svn/scraper_spec.rb +0 -90
data/spec/svn/svn_retriever_spec_helper.rb +0 -90
data/spec/svn/url_spec.rb +0 -47
data/spec/url_spec.rb +0 -164

data/lib/right_scraper/scrapers.rb ADDED Viewed

@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2013 RightScale Inc
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# ancestor
+require 'right_scraper'
+module RightScraper
+  module Scrapers
+    autoload :Base, 'right_scraper/scrapers/base'
+    autoload :Cookbook, 'right_scraper/scrapers/cookbook'
+    autoload :Workflow, 'right_scraper/scrapers/workflow'
+  end
+end

data/lib/right_scraper/scrapers/base.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 #--
-# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
+# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -21,242 +21,254 @@
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #++
-module RightScraper
-  module Scrapers
+# ancestor
+require 'right_scraper/scrapers'
-    # Base class for all scrapers. Subclasses should override
-    # #find_next which instantiates the resource from the file system.
-    class Base
-      # Scraped resources
-      attr_reader :resources
-      # Initialize scraper
-      #
-      # === Options
-      # <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
-      # <tt>:repo_dir</tt>:: Required, path to directory containing files
-      #   to be scraped
-      # <tt>:ignorable_paths</tt>:: List of directory names that should
-      #   be ignored by scraper
-      # <tt>:scanners</tt>:: List of Scanner classes to use, optional
-      # <tt>:builders</tt>:: List of Builder classes to use, optional
-      #
-      # === Return
-      # scraper(Scrapers::Base):: Corresponding scraper instance
-      def self.scraper(options)
-        scraper_kind = options.delete(:kind)
-        scraper_class = @@types[scraper_kind]
-        raise "Can't understand how to build scraper #{scraper_kind}" if scraper_class.nil?
-        scraper = scraper_class.new(options)
-      end
+module RightScraper::Scrapers
+  class ScraperError < Exception; end
+  # Base class for all scrapers. Subclasses should override
+  # #find_next which instantiates the resource from the file system.
+  class Base < ::RightScraper::RegisteredBase
+    # Scraped resources
+    attr_reader :resources
+    # @return [Module] module for registered repository types
+    def self.registration_module
+      ::RightScraper::Scrapers
+    end
-      # Do the scrape!
-      # Extract all resources from directory
-      # Call this method or call 'next_resource' to retrieve
-      # resources one by one (you must then call 'close' yourself)
-      # Fill @resources
-      #
-      # === Return
-      # resources<Array>:: List of all scraped resources
-      def scrape
-        @resources = []
-        begin
+    # Initialize scraper
+    #
+    # === Options
+    # <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
+    # <tt>:repo_dir</tt>:: Required, path to directory containing files
+    #   to be scraped
+    # <tt>:ignorable_paths</tt>:: List of directory names that should
+    #   be ignored by scraper
+    # <tt>:scanners</tt>:: List of Scanner classes to use, optional
+    # <tt>:builders</tt>:: List of Builder classes to use, optional
+    #
+    # === Return
+    # scraper(Scrapers::Base):: Corresponding scraper instance
+    def self.scraper(options)
+      scraper_kind = options.delete(:kind)
+      scraper_class = query_registered_type(scraper_kind)
+      scraper_class.new(options)
+    end
+    # Do the scrape!
+    # Extract all resources from directory
+    # Call this method or call 'next_resource' to retrieve
+    # resources one by one (you must then call 'close' yourself)
+    # Fill @resources
+    #
+    # === Return
+    # resources<Array>:: List of all scraped resources
+    def scrape
+      @resources = []
+      begin
+        resource = next_resource
+        until resource.nil?
+          @resources << resource
           resource = next_resource
-          until resource.nil?
-            @resources << resource
-            resource = next_resource
-          end
-        ensure
-          close
         end
-        @resources
+      ensure
+        close
       end
+      @resources
+    end
-      # Return the next resource in the filesystem, or nil if none.  As
-      # a part of building the resources, invokes the builders.
-      # A resource can be a cookbook, a workflow, a RightScript etc.
-      #
-      # === Returns
-      # Object:: next resource in filesystem, or nil if none.
-      def next_resource
-        @logger.operation(:next) do
-          next nil if @next.nil?
+    # Return the next resource in the filesystem, or nil if none.  As
+    # a part of building the resources, invokes the builders.
+    # A resource can be a cookbook, a workflow, a RightScript etc.
+    #
+    # === Returns
+    # Object:: next resource in filesystem, or nil if none.
+    def next_resource
+      @logger.operation(:next) do
+        next nil if @next.nil?
-          value = @next
-          @next = search_dirs
-          while @next.nil? && !@queue.empty?
-            pop_queue
-          end
-          value
+        value = @next
+        @next = search_dirs
+        while @next.nil? && !@queue.empty?
+          pop_queue
         end
+        value
       end
+    end
-      # Close any opened file descriptor
-      #
-      # === Return
-      # true:: Always return true
-      def close
-        @builder.finish
-        if @stack && !@stack.empty?
-          @stack.each {|s| s.close}
-          @stack = []
-        end
-        true
+    # Close any opened file descriptor
+    #
+    # === Return
+    # true:: Always return true
+    def close
+      @builder.finish
+      if @stack && !@stack.empty?
+        @stack.each {|s| s.close}
+        @stack = []
       end
+      true
+    end
-      protected
-      # Directory containing files to be scraped
-      attr_reader :repo_dir
+    protected
-      # (Hash) Lookup table from textual description of scraper type
-      # ('cookbook' or 'workflow' currently) to the class that
-      # represents that scraper.
-      @@types = {} unless class_variable_defined?(:@@types)
+    # Directory containing files to be scraped
+    attr_reader :repo_dir
-      # Initialize scraper
-      #
-      # === Options
-      # <tt>:repository</tt>:: Required, original repository containing scraped
-      #   files
-       # <tt>:repo_dir</tt>:: Required, path to directory containing files
-      #   to be scraped
-      # <tt>:ignorable_paths</tt>:: List of directory names that should
-      #   be ignored by scraper
-      # <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
-      #   to RightScraper::Scanners::ResourceManifest and
-      #   RightScraper::Scanners::CookbookMetadata
-      # <tt>:builders</tt>:: List of Builder classes to use, defaulting to
-      #   RightScaper::Builders::Filesystem
-      #
-      def initialize(options)
-        raise "Repository required when initializing a scraper" unless options[:repository]
-        raise "Repository directory required when initializing a scraper" unless options[:repo_dir]
-        @repository = options[:repository]
-        @logger = options[:logger] || ScraperLogger.new
-        @repo_dir = options[:repo_dir]
-        @ignorable_paths = options[:ignorable_paths]
-        @stack = []
-        @queue = (@repository.resources_path || [""]).reverse
-        @resources = []
-        scanners = options[:scanners] || default_scanners
-        @scanner = RightScraper::Scanners::Union.new(scanners, options)
-        builders = options[:builders] || default_builders
-        @builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
-                                                               :scanner         => @scanner,
-                                                               :logger          => @logger,
-                                                               :max_bytes       => @max_bytes,
-                                                               :max_seconds     => @max_seconds)
-        pop_queue # Initialize @next
+    # Initialize scraper
+    #
+    # === Options
+    # <tt>:repository</tt>:: Required, original repository containing scraped
+    #   files
+     # <tt>:repo_dir</tt>:: Required, path to directory containing files
+    #   to be scraped
+    # <tt>:ignorable_paths</tt>:: List of directory names that should
+    #   be ignored by scraper
+    # <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
+    #   to RightScraper::Scanners::ResourceManifest and
+    #   RightScraper::Scanners::CookbookMetadata
+    # <tt>:builders</tt>:: List of Builder classes to use, defaulting to
+    #   RightScaper::Builders::Filesystem
+    #
+    def initialize(options)
+      raise ScraperError.new("Repository required when initializing a scraper") unless options[:repository]
+      raise ScraperError.new("Repository directory required when initializing a scraper") unless options[:repo_dir]
+      @repository = options[:repository]
+      unless @logger = options[:logger]
+        raise ::ArgumentError, ':logger is required'
       end
+      @repo_dir = options[:repo_dir]
+      @ignorable_paths = options[:ignorable_paths]
+      @stack = []
+      @queue = (@repository.resources_path || [""]).reverse
-      # List of default scanners for this scaper
-      #
-      # === Return
-      # Array<Scanner>:: Default scanners
-      def default_scanners
-      end
+      # Make sure the requested cookbook resource path exists
+      missing_paths = @queue.select {|path| !File.directory?(File.join(repo_dir, path)) }.compact.sort
-      # List of default builders for this scaper
-      #
-      # === Return
-      # Array<Builder>:: Default builders
-      def default_brokers
-      end
+      raise ScraperError.new(
+        "Cookbook resource path#{'s' unless missing_paths.size < 2}: " +
+        "[#{missing_paths.join(', ')}] #{missing_paths.size < 2 ? "is" : "are"} " +
+        "non-existent for this repository and branch") unless missing_paths.empty?
-      # Find the interesting item in given directory
-      # Override in actual scraper implementation
-      #
-      # === Parameters
-      # dir(Dir):: directory to begin search in
-      def find_next(dir)
-        raise NotImplementedError
-      end
+      @resources = []
+      scanners = options[:scanners] || default_scanners
+      @scanner = RightScraper::Scanners::Union.new(scanners, options)
+      builders = options[:builders] || default_builders
+      @builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
+                                                             :scanner         => @scanner,
+                                                             :logger          => @logger,
+                                                             :max_bytes       => @max_bytes,
+                                                             :max_seconds     => @max_seconds)
+      pop_queue # Initialize @next
+    end
-      # Return the position of the scraper.  Here, the position is the
-      # path relative from the top of the temporary directory.  Akin to
-      # IO#pos or IO#tell.
-      def pos
-        strip_repo_dir(@stack.last.path)
-      end
-      alias_method :tell, :pos
+    # List of default scanners for this scaper
+    #
+    # === Return
+    # Array<Scanner>:: Default scanners
+    def default_scanners
+    end
-      # Turn path from an absolute filesystem location to a relative
-      # file location from #repo_dir.
-      #
-      # === Parameters
-      # path(String):: absolute path to relativize
-      #
-      # === Returns
-      # res(String):: relative pathname for path
-      def strip_repo_dir(path)
-        res = path[repo_dir.length+1..-1]
-        if res == nil || res == ""
-          "."
-        else
-          res
-        end
-      end
-      private :strip_repo_dir
+    # List of default builders for this scaper
+    #
+    # === Return
+    # Array<Builder>:: Default builders
+    def default_brokers
+    end
+    # Find the interesting item in given directory
+    # Override in actual scraper implementation
+    #
+    # === Parameters
+    # dir(Dir):: directory to begin search in
+    def find_next(dir)
+      raise NotImplementedError
+    end
+    # Return the position of the scraper.  Here, the position is the
+    # path relative from the top of the temporary directory.  Akin to
+    # IO#pos or IO#tell.
+    def pos
+      strip_repo_dir(@stack.last.path)
+    end
+    alias_method :tell, :pos
-      # Test if the entry given is ignorable.  By default just uses
-      # #ignorable_paths
-      #
-      # === Parameters
-      # entry(String):: file name to check
-      #
-      # === Returns
-      # Boolean:: true if the entry should be ignored
-      def ignorable?(entry)
-        @ignorable_paths.include?(entry)
+    # Turn path from an absolute filesystem location to a relative
+    # file location from #repo_dir.
+    #
+    # === Parameters
+    # path(String):: absolute path to relativize
+    #
+    # === Returns
+    # res(String):: relative pathname for path
+    def strip_repo_dir(path)
+      res = path[repo_dir.length+1..-1]
+      if res == nil || res == ""
+        "."
+      else
+        res
       end
+    end
+    private :strip_repo_dir
-      # Initialize @next with the next resource
-      #
-      # === Returns
-      # @next(Resources::Base):: Next resource
-      def pop_queue
-        until @queue.empty?
-          nextdir = @queue.pop
-          if File.directory?(File.join(repo_dir, nextdir))
-            @next = find_next(Dir.new(File.join(repo_dir, nextdir)))
-            return @next
-          else
-            @logger.warn("When processing in #{@repository}, no such path #{nextdir}")
-          end
+    # Test if the entry given is ignorable.  By default just uses
+    # #ignorable_paths
+    #
+    # === Parameters
+    # entry(String):: file name to check
+    #
+    # === Returns
+    # Boolean:: true if the entry should be ignored
+    def ignorable?(entry)
+      @ignorable_paths.include?(entry)
+    end
+    # Initialize @next with the next resource
+    #
+    # === Returns
+    # @next(Resources::Base):: Next resource
+    def pop_queue
+      until @queue.empty?
+        nextdir = File.join(repo_dir, @queue.pop)
+        if File.directory?(nextdir)
+          @next = find_next(Dir.new(nextdir))
+          return @next
+        else
+          @logger.warn("When processing in #{@repository}, no such path #{nextdir}")
         end
-        @next = nil
       end
+      @next = nil
+    end
-      # Search the directory stack looking for the next resource.
-      def search_dirs
-        @logger.operation(:searching) do
-          until @stack.empty?
-            dir = @stack.last
-            entry = dir.read
-            if entry == nil
-              dir.close
-              @stack.pop
-              next
-            end
+    # Search the directory stack looking for the next resource.
+    def search_dirs
+      @logger.operation(:searching) do
+        until @stack.empty?
+          dir = @stack.last
+          entry = dir.read
+          if entry == nil
+            dir.close
+            @stack.pop
+            next
+          end
-            next if entry == '.' || entry == '..'
-            next if ignorable?(entry)
+          next if entry == '.' || entry == '..'
+          next if ignorable?(entry)
-            fullpath = File.join(dir.path, entry)
+          fullpath = File.join(dir.path, entry)
-            if File.directory?(fullpath)
-              result = find_next(Dir.new(fullpath))
-              break
-            end
+          if File.directory?(fullpath)
+            result = find_next(Dir.new(fullpath))
+            break
           end
-          result
         end
+        result
       end
-      private :search_dirs
     end
+    private :search_dirs
   end
 end