RubyGems - right_scraper - Versions diffs - 3.0.0 → 3.0.1 - Mend

right_scraper 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/lib/right_scraper/retrievers/base.rb +8 -0
data/lib/right_scraper/retrievers/checkout.rb +6 -5
data/lib/right_scraper/retrievers/download.rb +23 -0
data/lib/right_scraper/retrievers/git.rb +28 -1
data/lib/right_scraper/retrievers/svn.rb +22 -0
data/lib/right_scraper/scraper.rb +1 -1
data/lib/right_scraper/scrapers/workflow.rb +8 -1
data/right_scraper.gemspec +1 -1
data/right_scraper.rconf +2 -2
data/spec/cookbook_s3_upload_spec.rb +7 -5
data/spec/git/retriever_spec.rb +27 -3
data/spec/svn/retriever_spec.rb +12 -7
metadata +29 -73

data/lib/right_scraper/retrievers/base.rb CHANGED Viewed

@@ -43,6 +43,9 @@ module RightScraper
       # String:: Path to directory where files are retrieved
       attr_reader :repo_dir
+      # exceptions
+      class RetrieverError < Exception; end
       # Create a new retriever for the given repository.  This class
       # recognizes several options, and subclasses may recognize
       # additional options.  Options may never be required.
@@ -73,6 +76,11 @@ module RightScraper
         end
       end
+      # Determines if retriever is available (has required CLI tools, etc.)
+      def available?
+        raise NotImplementedError
+      end
       # Paths to ignore when traversing the filesystem.  Mostly used for
       # things like Git and Subversion version control directories.
       #

data/lib/right_scraper/retrievers/checkout.rb CHANGED Viewed

@@ -24,16 +24,17 @@
 module RightScraper
   module Retrievers
-    # Base class for retrievers that want to do version control
-    # operations (CVS, SVN, etc.).  Subclasses can get away with
-    # implementing only #do_checkout but to support incremental
-    # operation need to implement #exists? and #do_update, in addition
-    # to Retrievers::Base#ignorable_paths.
+    # Base class for retrievers that want to do version control operations
+    # (CVS, SVN, etc.). Subclasses can get away with implementing only
+    # Retrievers::Base#available? and #do_checkout but to support incremental
+    # operation need to implement #exists? and #do_update, in addition to
+    # Retrievers::Base#ignorable_paths.
     class CheckoutBasedRetriever < Base
       # Check out repository into the directory.  Occurs between
       # variable initialization and beginning scraping.
       def retrieve
+        raise RetrieverError.new("retriever is unavailable") unless available?
         if exists?
           begin
             @logger.operation(:updating) do

data/lib/right_scraper/retrievers/download.rb CHANGED Viewed

@@ -30,6 +30,28 @@ module RightScraper
     # somewhere.  Uses command line curl and command line tar.
     class Download < Base
+      @@available = false
+      # Determines if downloader is available.
+      def available?
+        unless @@available
+          begin
+            # FIX: we might want to parse the result and require a minimum curl
+            # version.
+            cmd = "curl --version"
+            `#{cmd}`
+            if $?.success?
+              @@available = true
+            else
+              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
+            end
+          rescue
+            @logger.note_error($!, :available, "download retriever is unavailable")
+          end
+        end
+        @@available
+      end
       # Directory used to download tarballs
       def workdir
         File.join(@basedir, @repository.repository_hash)
@@ -42,6 +64,7 @@ module RightScraper
       # Download tarball and unpack it
       def retrieve
+        raise RetrieverError.new("download retriever is unavailable") unless available?
         FileUtils.remove_entry_secure workdir if File.exists?(workdir)
         FileUtils.mkdir_p repo_dir
         file = File.join(workdir, "package")

data/lib/right_scraper/retrievers/git.rb CHANGED Viewed

@@ -20,16 +20,43 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #++
-require 'git'
 module RightScraper
   module Retrievers
     # Retriever for resources stored in a git repository.
     class Git < CheckoutBasedRetriever
+      @@available = false
+      # Determines if downloader is available.
+      def available?
+        unless @@available
+          begin
+            require 'git'
+            # note that require 'git' does the same version check on load but
+            # we don't want to assume any particular implementation.
+            #
+            # FIX: we might want to parse the result and require a minimum git
+            # client version.
+            cmd = "git --version"
+            `#{cmd}`
+            if $?.success?
+              @@available = true
+            else
+              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
+            end
+          rescue
+            @logger.note_error($!, :available, "git retriever is unavailable")
+          end
+        end
+        @@available
+      end
       # In addition to normal retriever initialization, if the
       # underlying repository has a credential we need to initialize a
       # fresh SSHAgent and add the credential to it.
       def retrieve
+        raise RetrieverError.new("git retriever is unavailable") unless available?
         RightScraper::Processes::SSHAgent.with do |agent|
           agent.add_key(@repository.first_credential) unless
             @repository.first_credential.nil?

data/lib/right_scraper/retrievers/svn.rb CHANGED Viewed

@@ -31,6 +31,28 @@ module RightScraper
       include RightScraper::SvnClient
+      @@available = false
+      # Determines if svn is available.
+      def available?
+        unless @@available
+          begin
+            # FIX: we might want to parse the result and require a minimum svn
+            # client version.
+            cmd = "svn --version"
+            `#{cmd}`
+            if $?.success?
+              @@available = true
+            else
+              raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
+            end
+          rescue
+            @logger.note_error($!, :available, "svn retriever is unavailable")
+          end
+        end
+        @@available
+      end
       # Return true if a checkout exists.  Currently tests for .svn in
       # the checkout.
       #

data/lib/right_scraper/scraper.rb CHANGED Viewed

@@ -82,7 +82,7 @@ module RightScraper
         retriever = nil
         @logger.operation(:retrieving, "from #{repo}") do
           retriever = repo.retriever(@options)
-          retriever.retrieve
+          retriever.retrieve if retriever.available?
         end
         # 2. Now scrape if there is a scraper in the options

data/lib/right_scraper/scrapers/workflow.rb CHANGED Viewed

@@ -41,6 +41,14 @@ module RightScraper
       # dir(Dir):: directory to begin search in
       def find_next(dir)
         @logger.operation(:finding_next_workflow, "in #{dir.path}") do
+          # Note: there could be multiple workflow definitions in one directory
+          # so we need to record the current position whether we found a workflow
+          # or not. The next iteration will search again in the current directory
+          # event if we found one. If we don't find one then we call
+          # 'search_dirs' which will recurse in the sub-directories.
+          @stack << dir
           def_ext = RightScraper::Resources::Workflow::DEFINITION_EXT
           meta_ext = RightScraper::Resources::Workflow::METADATA_EXT
           potentials = Dir[File.join(dir.path, "*#{def_ext}")]
@@ -56,7 +64,6 @@ module RightScraper
               workflow
             end
           else
-            @stack << dir
             search_dirs
           end
         end

data/right_scraper.gemspec CHANGED Viewed

@@ -24,7 +24,7 @@ require 'rubygems'
 Gem::Specification.new do |spec|
   spec.name      = 'right_scraper'
-  spec.version   = '3.0.0'
+  spec.version   = '3.0.1'
   spec.authors   = ['Graham Hughes', 'Raphael Simon']
   spec.email     = 'raphael@rightscale.com'
   spec.homepage  = 'https://github.com/rightscale/right_scraper'

data/right_scraper.rconf CHANGED Viewed

@@ -4,10 +4,10 @@
 #
 ruby do
   version  'ruby-1.9.2-p290'
-  rubygems '1.6.2'
+  rubygems '1.8.10'
   gemset   'right_scraper'
 end
 bundler do
-  version     '1.0.10'
+  version     '1.0.18'
   bundle_path File.join(ENV["HOME"], '.rightscale', 'right_scraper')
 end

data/spec/cookbook_s3_upload_spec.rb CHANGED Viewed

@@ -83,10 +83,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
       bucket_name = 'this-bucket-does-not-exist'
       @s3.bucket(bucket_name).should be_nil
       lambda {
-      @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
+      @scraper = @scraperclass.new(:repository => @repo,
+                                   :repo_dir => @download_repo_path,
                                    :scanners => [RightScraper::Scanners::CookbookMetadata,
                                                  RightScraper::Scanners::CookbookManifest,
-                                                 RightScraper::Scanners::S3Upload],
+                                                 RightScraper::Scanners::CookbookS3Upload],
                                    :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
                                    :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
                                    :s3_bucket => bucket_name,
@@ -106,10 +107,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
                                                :repo_type    => :download,
                                                :url          => "file:///#{@download_file}")
       bucket_name = 'com.rightscale.test.20100823'
-      @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
+      @scraper = @scraperclass.new(:repository => @repo,
+                                   :repo_dir => @download_repo_path,
                                    :scanners => [RightScraper::Scanners::CookbookMetadata,
                                                  RightScraper::Scanners::CookbookManifest,
-                                                 RightScraper::Scanners::S3Upload],
+                                                 RightScraper::Scanners::CookbookS3Upload],
                                    :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
                                    :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
                                    :s3_bucket => bucket_name,
@@ -128,7 +130,7 @@ describe RightScraper::Scanners::CookbookS3Upload do
     context 'that has scraped' do
       before(:each) do
-        @cookbook = @scraper.next
+        @cookbook = @scraper.next_resource
         @cookbook.should_not be_nil
       end

data/spec/git/retriever_spec.rb CHANGED Viewed

@@ -113,15 +113,17 @@ describe RightScraper::Retrievers::Git do
         include RightScraper::SpecHelpers::FromScratchScraping
         include RightScraper::SpecHelpers::WorkflowScraping
-        it 'should still see only one workflow' do
+        it 'should see two workflows' do
+          @scraper.next_resource.should_not == nil
           @scraper.next_resource.should_not == nil
           @scraper.next_resource.should == nil
         end
         it 'should have the subworkflow in the manifest' do
           workflow = @scraper.next_resource
-          workflow.manifest["workflow.def"].should == "15ce480ea6c94b51056e028b0e0bd7da8024d924"
-          workflow.manifest["workflow.meta"].should == "5f36b2ea290645ee34d943220a14b54ee5ea5be5"
+          workflow = @scraper.next_resource
+          workflow.manifest["workflow.def"].should == "e687ad52d8fba8010a255e3c2a9e891264a24910"
+          workflow.manifest["workflow.meta"].should == "58060413e90f84add5b2dace3ba7e30d2689336f"
         end
       end
@@ -150,8 +152,30 @@ describe RightScraper::Retrievers::Git do
           end
           scraped.should have(@workflow_places.size).repositories
         end
+      end
+      context 'with two-level deep workflows' do
+        before(:each) do
+          @workflow_places = [File.join(@helper.repo_path, "workflows", "first"),
+            File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "second"),
+            File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "third")]
+          @workflow_places.each {|place| secondary_workflow(place)}
+          @helper.commit_content("secondary workflows added")
+        end
+        include RightScraper::SpecHelpers::FromScratchScraping
+        include RightScraper::SpecHelpers::WorkflowScraping
+        it 'should scrape' do
+          @scraper.scrape
+          @scraper.resources.each do |res|
+            res.metadata_path.should_not be_nil
+            res.definition_path.should_not be_nil
+          end
+          @scraper.resources.size.should == @workflow_places.size + 1 # One in the root repo_path
+        end
       end
     end
     context 'of cookbooks' do

data/spec/svn/retriever_spec.rb CHANGED Viewed

@@ -40,12 +40,14 @@ describe RightScraper::Retrievers::Svn do
     before(:each) do
       pending "Not run unless REMOTE_USER and REMOTE_PASSWORD set" unless ENV['REMOTE_USER'] && ENV['REMOTE_PASSWORD']
       url = 'https://wush.net/svn/rightscale/cookbooks_test/'
+      @helper = RightScraper::SvnRetrieverSpecHelper.new
       @repo = RightScraper::Repositories::Base.from_hash(:display_name => 'wush',
                                                :repo_type    => :svn,
                                                :url          => url,
                                                :first_credential => ENV['REMOTE_USER'],
                                                :second_credential => ENV['REMOTE_PASSWORD'])
       @retriever = @retriever_class.new(@repo, :max_bytes => 1024**2,
+                                               :basedir     => @helper.scraper_path,
                                                :max_seconds => 20)
     end
@@ -61,17 +63,17 @@ describe RightScraper::Retrievers::Svn do
     # quick_start not actually being a cookbook
     it 'should scrape 5 repositories' do
+      @retriever.retrieve
+      @scraper = RightScraper::Scrapers::Base.scraper(:kind            => :cookbook,
+                                                      :ignorable_paths => @retriever.ignorable_paths,
+                                                      :repo_dir        => @retriever.repo_dir,
+                                                      :repository      => @retriever.repository)
       locations = Set.new
       (1..5).each {|n|
         cookbook = @scraper.next_resource
         locations << cookbook.pos
         cookbook.should_not == nil
       }
-      @retriever.retrieve
-      @scraper = RightScraper::Scrapers::Base.scraper(:kind            => :cookbook,
-                                                      :ignorable_paths => @retriever.ignorable_paths,
-                                                      :repo_dir        => @retriever.repo_dir,
-                                                      :repository      => @retriever.repository)
       @scraper.next_resource.should == nil
       locations.should == Set.new(["cookbooks/app_rails",
                                    "cookbooks/db_mysql",
@@ -131,8 +133,11 @@ describe RightScraper::Retrievers::Svn do
       include RightScraper::SpecHelpers::CookbookScraping
       it 'should scrape' do
-        @cookbook_places.each do |place|
-          check_resource @scraper.next_resource, :position => place[@helper.repo_path.length+1..-1]
+        scraped = []
+        while scrape = @scraper.next_resource
+          place = (@cookbook_places - scraped).detect {|place| File.join(@helper.repo_path, scrape.pos) == place}
+          scraped << place
+          check_resource scrape, :position => place[@helper.repo_path.length+1..-1]
         end
         scraped.should have(@cookbook_places.size).repositories
       end

metadata CHANGED Viewed

@@ -1,13 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: right_scraper
 version: !ruby/object:Gem::Version
-  hash: 7
-  prerelease: false
-  segments:
-  - 3
-  - 0
-  - 0
-  version: 3.0.0
+  prerelease:
+  version: 3.0.1
 platform: ruby
 authors:
 - Graham Hughes
@@ -16,129 +11,96 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-09-26 00:00:00 -07:00
-default_executable:
+date: 2011-10-25 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  version_requirements: &id001 !ruby/object:Gem::Requirement
+  name: json
+  requirement: &id001 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 13
-        segments:
-        - 1
-        - 4
-        - 5
         version: 1.4.5
-  requirement: *id001
   type: :runtime
-  name: json
   prerelease: false
+  version_requirements: *id001
 - !ruby/object:Gem::Dependency
-  version_requirements: &id002 !ruby/object:Gem::Requirement
+  name: git
+  requirement: &id002 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 21
-        segments:
-        - 1
-        - 2
-        - 5
         version: 1.2.5
-  requirement: *id002
   type: :runtime
-  name: git
   prerelease: false
+  version_requirements: *id002
 - !ruby/object:Gem::Dependency
-  version_requirements: &id003 !ruby/object:Gem::Requirement
+  name: libarchive
+  requirement: &id003 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 25
-        segments:
-        - 0
-        - 1
-        - 1
         version: 0.1.1
-  requirement: *id003
   type: :runtime
-  name: libarchive
   prerelease: false
+  version_requirements: *id003
 - !ruby/object:Gem::Dependency
-  version_requirements: &id004 !ruby/object:Gem::Requirement
+  name: right_aws
+  requirement: &id004 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 2
-        - 0
         version: "2.0"
-  requirement: *id004
   type: :runtime
-  name: right_aws
   prerelease: false
+  version_requirements: *id004
 - !ruby/object:Gem::Dependency
-  version_requirements: &id005 !ruby/object:Gem::Requirement
+  name: process_watcher
+  requirement: &id005 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        hash: 13
-        segments:
-        - 0
-        - 3
         version: "0.3"
-  requirement: *id005
   type: :runtime
-  name: process_watcher
   prerelease: false
+  version_requirements: *id005
 - !ruby/object:Gem::Dependency
-  version_requirements: &id006 !ruby/object:Gem::Requirement
+  name: rspec
+  requirement: &id006 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  requirement: *id006
   type: :development
-  name: rspec
   prerelease: false
+  version_requirements: *id006
 - !ruby/object:Gem::Dependency
-  version_requirements: &id007 !ruby/object:Gem::Requirement
+  name: flexmock
+  requirement: &id007 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  requirement: *id007
   type: :development
-  name: flexmock
   prerelease: false
+  version_requirements: *id007
 - !ruby/object:Gem::Dependency
-  version_requirements: &id008 !ruby/object:Gem::Requirement
+  name: rtags
+  requirement: &id008 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  requirement: *id008
   type: :development
-  name: rtags
   prerelease: false
+  version_requirements: *id008
 description: "  RightScraper provides a simple interface to download and keep local copies of remote\n  repositories up-to-date using the following protocols:\n    * git: RightScraper will clone then pull repos from git\n    * SVN: RightScraper will checkout then update SVN repositories\n    * tarballs: RightScraper will download, optionally uncompress and expand a given tar file\n  On top of retrieving remote repositories, right_scraper also include \"scrapers\" that\n  will analyze the repository content and instantiate \"resources\" as a result. Currently\n  supported resources are Chef cookbooks and RightScale workflow definitions.\n"
 email: raphael@rightscale.com
 executables: []
@@ -225,7 +187,6 @@ files:
 - spec/svn/svn_retriever_spec_helper.rb
 - spec/svn/url_spec.rb
 - spec/url_spec.rb
-has_rdoc: true
 homepage: https://github.com/rightscale/right_scraper
 licenses: []
@@ -242,18 +203,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 57
-      segments:
-      - 1
-      - 8
-      - 7
       version: 1.8.7
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3
+      hash: 1773174315538447853
       segments:
       - 0
       version: "0"
@@ -262,7 +218,7 @@ requirements:
 - curl command line client
 - Subversion command line client
 rubyforge_project: right_scraper
-rubygems_version: 1.3.7
+rubygems_version: 1.8.10
 signing_key:
 specification_version: 3
 summary: Download and update remote repositories