RubyGems - right_scraper - Versions diffs - 3.0.0 → 3.0.1 - Mend

right_scraper 3.0.0 → 3.0.1

Files changed (13) hide show

data/lib/right_scraper/retrievers/base.rb +8 -0
data/lib/right_scraper/retrievers/checkout.rb +6 -5
data/lib/right_scraper/retrievers/download.rb +23 -0
data/lib/right_scraper/retrievers/git.rb +28 -1
data/lib/right_scraper/retrievers/svn.rb +22 -0
data/lib/right_scraper/scraper.rb +1 -1
data/lib/right_scraper/scrapers/workflow.rb +8 -1
data/right_scraper.gemspec +1 -1
data/right_scraper.rconf +2 -2
data/spec/cookbook_s3_upload_spec.rb +7 -5
data/spec/git/retriever_spec.rb +27 -3
data/spec/svn/retriever_spec.rb +12 -7
metadata +29 -73

data/lib/right_scraper/retrievers/base.rb CHANGED Viewed

@@ -43,6 +43,9 @@ module RightScraper
       # String:: Path to directory where files are retrieved
       attr_reader :repo_dir
+      # exceptions
+      class RetrieverError < Exception; end
       # Create a new retriever for the given repository.  This class
       # recognizes several options, and subclasses may recognize
       # additional options.  Options may never be required.
@@ -73,6 +76,11 @@ module RightScraper
         end
       end
+      # Determines if retriever is available (has required CLI tools, etc.)
+      def available?
+        raise NotImplementedError
+      end
       # Paths to ignore when traversing the filesystem.  Mostly used for
       # things like Git and Subversion version control directories.
       #

data/lib/right_scraper/retrievers/checkout.rb CHANGED Viewed

@@ -24,16 +24,17 @@
 module RightScraper
   module Retrievers
-    # Base class for retrievers that want to do version control
-    # operations (CVS, SVN, etc.).  Subclasses can get away with
-    # implementing only #do_checkout but to support incremental
-    # operation need to implement #exists? and #do_update, in addition
-    # to Retrievers::Base#ignorable_paths.
+    # Base class for retrievers that want to do version control operations
+    # (CVS, SVN, etc.). Subclasses can get away with implementing only
+    # Retrievers::Base#available? and #do_checkout but to support incremental
+    # operation need to implement #exists? and #do_update, in addition to
+    # Retrievers::Base#ignorable_paths.
     class CheckoutBasedRetriever < Base
       # Check out repository into the directory.  Occurs between
       # variable initialization and beginning scraping.
       def retrieve
+        raise RetrieverError.new("retriever is unavailable") unless available?
         if exists?
           begin
             @logger.operation(:updating) do

data/lib/right_scraper/retrievers/download.rb CHANGED Viewed

@@ -30,6 +30,28 @@ module RightScraper
     # somewhere.  Uses command line curl and command line tar.
     class Download < Base
+      @@available = false
+      # Determines if downloader is available.
+      def available?
+        unless @@available
+          begin
+            # FIX: we might want to parse the result and require a minimum curl
+            # version.
+            cmd = "curl --version"
+            `#{cmd}`
+            if $?.success?
+              @@available = true
+            else
+              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
+            end
+          rescue
+            @logger.note_error($!, :available, "download retriever is unavailable")
+          end
+        end
+        @@available
+      end
       # Directory used to download tarballs
       def workdir
         File.join(@basedir, @repository.repository_hash)
@@ -42,6 +64,7 @@ module RightScraper
       # Download tarball and unpack it
       def retrieve
+        raise RetrieverError.new("download retriever is unavailable") unless available?
         FileUtils.remove_entry_secure workdir if File.exists?(workdir)
         FileUtils.mkdir_p repo_dir
         file = File.join(workdir, "package")

data/lib/right_scraper/retrievers/git.rb CHANGED Viewed

@@ -20,16 +20,43 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #++
-require 'git'
 module RightScraper
   module Retrievers
     # Retriever for resources stored in a git repository.
     class Git < CheckoutBasedRetriever
+      @@available = false
+      # Determines if downloader is available.
+      def available?
+        unless @@available
+          begin
+            require 'git'
+            # note that require 'git' does the same version check on load but
+            # we don't want to assume any particular implementation.
+            #
+            # FIX: we might want to parse the result and require a minimum git
+            # client version.
+            cmd = "git --version"
+            `#{cmd}`
+            if $?.success?
+              @@available = true
+            else
+              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
+            end
+          rescue
+            @logger.note_error($!, :available, "git retriever is unavailable")
+          end
+        end
+        @@available
+      end
       # In addition to normal retriever initialization, if the
       # underlying repository has a credential we need to initialize a
       # fresh SSHAgent and add the credential to it.
       def retrieve
+        raise RetrieverError.new("git retriever is unavailable") unless available?
         RightScraper::Processes::SSHAgent.with do |agent|
           agent.add_key(@repository.first_credential) unless
             @repository.first_credential.nil?

data/lib/right_scraper/retrievers/svn.rb CHANGED Viewed

@@ -31,6 +31,28 @@ module RightScraper
       include RightScraper::SvnClient
+      @@available = false
+      # Determines if svn is available.
+      def available?
+        unless @@available
+          begin
+            # FIX: we might want to parse the result and require a minimum svn
+            # client version.
+            cmd = "svn --version"
+            `#{cmd}`
+            if $?.success?
+              @@available = true
+            else
+              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
+            end
+          rescue
+            @logger.note_error($!, :available, "svn retriever is unavailable")
+          end
+        end
+        @@available
+      end
       # Return true if a checkout exists.  Currently tests for .svn in
       # the checkout.
       #

data/lib/right_scraper/scraper.rb CHANGED Viewed

@@ -82,7 +82,7 @@ module RightScraper
         retriever = nil
         @logger.operation(:retrieving, "from #{repo}") do
           retriever = repo.retriever(@options)
-          retriever.retrieve
+          retriever.retrieve if retriever.available?
         end
         # 2. Now scrape if there is a scraper in the options

data/lib/right_scraper/scrapers/workflow.rb CHANGED Viewed

@@ -41,6 +41,14 @@ module RightScraper
       # dir(Dir):: directory to begin search in
       def find_next(dir)
         @logger.operation(:finding_next_workflow, "in #{dir.path}") do
+          # Note: there could be multiple workflow definitions in one directory
+          # so we need to record the current position whether we found a workflow
+          # or not. The next iteration will search again in the current directory
+          # event if we found one. If we don't find one then we call
+          # 'search_dirs' which will recurse in the sub-directories.
+          @stack << dir
           def_ext = RightScraper::Resources::Workflow::DEFINITION_EXT
           meta_ext = RightScraper::Resources::Workflow::METADATA_EXT
           potentials = Dir[File.join(dir.path, "*#{def_ext}")]
@@ -56,7 +64,6 @@ module RightScraper
               workflow
             end
           else
-            @stack << dir
             search_dirs
           end
         end

data/right_scraper.gemspec CHANGED Viewed

@@ -24,7 +24,7 @@ require 'rubygems'
 Gem::Specification.new do |spec|
   spec.name      = 'right_scraper'
-  spec.version   = '3.0.0'
+  spec.version   = '3.0.1'
   spec.authors   = ['Graham Hughes', 'Raphael Simon']
   spec.email     = 'raphael@rightscale.com'
   spec.homepage  = 'https://github.com/rightscale/right_scraper'

data/right_scraper.rconf CHANGED Viewed

@@ -4,10 +4,10 @@
 #
 ruby do
   version  'ruby-1.9.2-p290'
-  rubygems '1.6.2'
+  rubygems '1.8.10'
   gemset   'right_scraper'
 end
 bundler do
-  version     '1.0.10'
+  version     '1.0.18'
   bundle_path File.join(ENV["HOME"], '.rightscale', 'right_scraper')
 end

data/spec/cookbook_s3_upload_spec.rb CHANGED Viewed

@@ -83,10 +83,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
       bucket_name = 'this-bucket-does-not-exist'
       @s3.bucket(bucket_name).should be_nil
       lambda {
-      @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
+      @scraper = @scraperclass.new(:repository => @repo,
+                                   :repo_dir => @download_repo_path,
                                    :scanners => [RightScraper::Scanners::CookbookMetadata,
                                                  RightScraper::Scanners::CookbookManifest,
-                                                 RightScraper::Scanners::S3Upload],
+                                                 RightScraper::Scanners::CookbookS3Upload],
                                    :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
                                    :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
                                    :s3_bucket => bucket_name,
@@ -106,10 +107,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
                                                :repo_type    => :download,
                                                :url          => "file:///#{@download_file}")
       bucket_name = 'com.rightscale.test.20100823'
-      @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
+      @scraper = @scraperclass.new(:repository => @repo,
+                                   :repo_dir => @download_repo_path,
                                    :scanners => [RightScraper::Scanners::CookbookMetadata,
                                                  RightScraper::Scanners::CookbookManifest,
-                                                 RightScraper::Scanners::S3Upload],
+                                                 RightScraper::Scanners::CookbookS3Upload],
                                    :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
                                    :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
                                    :s3_bucket => bucket_name,
@@ -128,7 +130,7 @@ describe RightScraper::Scanners::CookbookS3Upload do
     context 'that has scraped' do
       before(:each) do
-        @cookbook = @scraper.next
+        @cookbook = @scraper.next_resource
         @cookbook.should_not be_nil
       end

data/spec/git/retriever_spec.rb CHANGED Viewed

@@ -113,15 +113,17 @@ describe RightScraper::Retrievers::Git do
         include RightScraper::SpecHelpers::FromScratchScraping
         include RightScraper::SpecHelpers::WorkflowScraping
-        it 'should still see only one workflow' do
+        it 'should see two workflows' do
+          @scraper.next_resource.should_not == nil
           @scraper.next_resource.should_not == nil
           @scraper.next_resource.should == nil
         end
         it 'should have the subworkflow in the manifest' do
           workflow = @scraper.next_resource
-          workflow.manifest["workflow.def"].should == "15ce480ea6c94b51056e028b0e0bd7da8024d924"
-          workflow.manifest["workflow.meta"].should == "5f36b2ea290645ee34d943220a14b54ee5ea5be5"
+          workflow = @scraper.next_resource
+          workflow.manifest["workflow.def"].should == "e687ad52d8fba8010a255e3c2a9e891264a24910"
+          workflow.manifest["workflow.meta"].should == "58060413e90f84add5b2dace3ba7e30d2689336f"
         end
       end
@@ -150,8 +152,30 @@ describe RightScraper::Retrievers::Git do
           end
           scraped.should have(@workflow_places.size).repositories
         end
+      end
+      context 'with two-level deep workflows' do
+        before(:each) do
+          @workflow_places = [File.join(@helper.repo_path, "workflows", "first"),
+            File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "second"),
+            File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "third")]
+          @workflow_places.each {|place| secondary_workflow(place)}
+          @helper.commit_content("secondary workflows added")
+        end
+        include RightScraper::SpecHelpers::FromScratchScraping
+        include RightScraper::SpecHelpers::WorkflowScraping
+        it 'should scrape' do
+          @scraper.scrape
+          @scraper.resources.each do |res|
+            res.metadata_path.should_not be_nil
+            res.definition_path.should_not be_nil
+          end
+          @scraper.resources.size.should == @workflow_places.size + 1 # One in the root repo_path
+        end
       end
     end
     context 'of cookbooks' do

data/spec/svn/retriever_spec.rb CHANGED Viewed

@@ -40,12 +40,14 @@ describe RightScraper::Retrievers::Svn do
     before(:each) do
       pending "Not run unless REMOTE_USER and REMOTE_PASSWORD set" unless ENV['REMOTE_USER'] && ENV['REMOTE_PASSWORD']
       url = 'https://wush.net/svn/rightscale/cookbooks_test/'
+      @helper = RightScraper::SvnRetrieverSpecHelper.new
       @repo = RightScraper::Repositories::Base.from_hash(:display_name => 'wush',
                                                :repo_type    => :svn,
                                                :url          => url,
                                                :first_credential => ENV['REMOTE_USER'],
                                                :second_credential => ENV['REMOTE_PASSWORD'])
       @retriever = @retriever_class.new(@repo, :max_bytes => 1024**2,
+                                               :basedir     => @helper.scraper_path,
                                                :max_seconds => 20)
     end
@@ -61,17 +63,17 @@ describe RightScraper::Retrievers::Svn do
     # quick_start not actually being a cookbook
     it 'should scrape 5 repositories' do
+      @retriever.retrieve
+      @scraper = RightScraper::Scrapers::Base.scraper(:kind            => :cookbook,
+                                                      :ignorable_paths => @retriever.ignorable_paths,
+                                                      :repo_dir        => @retriever.repo_dir,
+                                                      :repository      => @retriever.repository)
       locations = Set.new
       (1..5).each {|n|
         cookbook = @scraper.next_resource
         locations << cookbook.pos
         cookbook.should_not == nil
       }
-      @retriever.retrieve
-      @scraper = RightScraper::Scrapers::Base.scraper(:kind            => :cookbook,
-                                                      :ignorable_paths => @retriever.ignorable_paths,
-                                                      :repo_dir        => @retriever.repo_dir,
-                                                      :repository      => @retriever.repository)
       @scraper.next_resource.should == nil
       locations.should == Set.new(["cookbooks/app_rails",
                                    "cookbooks/db_mysql",
@@ -131,8 +133,11 @@ describe RightScraper::Retrievers::Svn do
       include RightScraper::SpecHelpers::CookbookScraping
       it 'should scrape' do
-        @cookbook_places.each do |place|
-          check_resource @scraper.next_resource, :position => place[@helper.repo_path.length+1..-1]
+        scraped = []
+        while scrape = @scraper.next_resource
+          place = (@cookbook_places - scraped).detect {|place| File.join(@helper.repo_path, scrape.pos) == place}
+          scraped << place
+          check_resource scrape, :position => place[@helper.repo_path.length+1..-1]
         end
         scraped.should have(@cookbook_places.size).repositories
       end

metadata CHANGED Viewed

@@ -1,13 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: right_scraper
 version: !ruby/object:Gem::Version
-  hash: 7
-  prerelease: false
-  segments:
-  - 3
-  - 0
-  - 0
-  version: 3.0.0
+  prerelease:
+  version: 3.0.1
 platform: ruby
 authors:
 - Graham Hughes
@@ -16,129 +11,96 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-09-26 00:00:00 -07:00
-default_executable:
+date: 2011-10-25 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  version_requirements: &id001 !ruby/object:Gem::Requirement
+  name: json
+  requirement: &id001 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 13
-        segments:
-        - 1
-        - 4
-        - 5
         version: 1.4.5
-  requirement: *id001
   type: :runtime
-  name: json
   prerelease: false
+  version_requirements: *id001
 - !ruby/object:Gem::Dependency
-  version_requirements: &id002 !ruby/object:Gem::Requirement
+  name: git
+  requirement: &id002 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 21
-        segments:
-        - 1
-        - 2
-        - 5
         version: 1.2.5
-  requirement: *id002
   type: :runtime
-  name: git
   prerelease: false
+  version_requirements: *id002
 - !ruby/object:Gem::Dependency
-  version_requirements: &id003 !ruby/object:Gem::Requirement
+  name: libarchive
+  requirement: &id003 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 25
-        segments:
-        - 0
-        - 1
-        - 1
         version: 0.1.1
-  requirement: *id003
   type: :runtime
-  name: libarchive
   prerelease: false
+  version_requirements: *id003
 - !ruby/object:Gem::Dependency
-  version_requirements: &id004 !ruby/object:Gem::Requirement
+  name: right_aws
+  requirement: &id004 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 2
-        - 0
         version: "2.0"
-  requirement: *id004
   type: :runtime
-  name: right_aws
   prerelease: false
+  version_requirements: *id004
 - !ruby/object:Gem::Dependency
-  version_requirements: &id005 !ruby/object:Gem::Requirement
+  name: process_watcher
+  requirement: &id005 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        hash: 13
-        segments:
-        - 0
-        - 3
         version: "0.3"
-  requirement: *id005
   type: :runtime
-  name: process_watcher
   prerelease: false
+  version_requirements: *id005
 - !ruby/object:Gem::Dependency
-  version_requirements: &id006 !ruby/object:Gem::Requirement
+  name: rspec
+  requirement: &id006 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  requirement: *id006
   type: :development
-  name: rspec
   prerelease: false
+  version_requirements: *id006
 - !ruby/object:Gem::Dependency
-  version_requirements: &id007 !ruby/object:Gem::Requirement
+  name: flexmock
+  requirement: &id007 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  requirement: *id007
   type: :development
-  name: flexmock
   prerelease: false
+  version_requirements: *id007
 - !ruby/object:Gem::Dependency
-  version_requirements: &id008 !ruby/object:Gem::Requirement
+  name: rtags
+  requirement: &id008 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  requirement: *id008
   type: :development
-  name: rtags
   prerelease: false
+  version_requirements: *id008
 description: "  RightScraper provides a simple interface to download and keep local copies of remote\n  repositories up-to-date using the following protocols:\n    * git: RightScraper will clone then pull repos from git\n    * SVN: RightScraper will checkout then update SVN repositories\n    * tarballs: RightScraper will download, optionally uncompress and expand a given tar file\n  On top of retrieving remote repositories, right_scraper also include \"scrapers\" that\n  will analyze the repository content and instantiate \"resources\" as a result. Currently\n  supported resources are Chef cookbooks and RightScale workflow definitions.\n"
 email: raphael@rightscale.com
 executables: []
@@ -225,7 +187,6 @@ files:
 - spec/svn/svn_retriever_spec_helper.rb
 - spec/svn/url_spec.rb
 - spec/url_spec.rb
-has_rdoc: true
 homepage: https://github.com/rightscale/right_scraper
 licenses: []
@@ -242,18 +203,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 57
-      segments:
-      - 1
-      - 8
-      - 7
       version: 1.8.7
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3
+      hash: 1773174315538447853
       segments:
       - 0
       version: "0"
@@ -262,7 +218,7 @@ requirements:
 - curl command line client
 - Subversion command line client
 rubyforge_project: right_scraper
-rubygems_version: 1.3.7
+rubygems_version: 1.8.10
 signing_key:
 specification_version: 3
 summary: Download and update remote repositories