right_scraper 3.0.0 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -43,6 +43,9 @@ module RightScraper
43
43
  # String:: Path to directory where files are retrieved
44
44
  attr_reader :repo_dir
45
45
 
46
+ # exceptions
47
+ class RetrieverError < Exception; end
48
+
46
49
  # Create a new retriever for the given repository. This class
47
50
  # recognizes several options, and subclasses may recognize
48
51
  # additional options. Options may never be required.
@@ -73,6 +76,11 @@ module RightScraper
73
76
  end
74
77
  end
75
78
 
79
+ # Determines if retriever is available (has required CLI tools, etc.)
80
+ def available?
81
+ raise NotImplementedError
82
+ end
83
+
76
84
  # Paths to ignore when traversing the filesystem. Mostly used for
77
85
  # things like Git and Subversion version control directories.
78
86
  #
@@ -24,16 +24,17 @@
24
24
  module RightScraper
25
25
  module Retrievers
26
26
 
27
- # Base class for retrievers that want to do version control
28
- # operations (CVS, SVN, etc.). Subclasses can get away with
29
- # implementing only #do_checkout but to support incremental
30
- # operation need to implement #exists? and #do_update, in addition
31
- # to Retrievers::Base#ignorable_paths.
27
+ # Base class for retrievers that want to do version control operations
28
+ # (CVS, SVN, etc.). Subclasses can get away with implementing only
29
+ # Retrievers::Base#available? and #do_checkout but to support incremental
30
+ # operation need to implement #exists? and #do_update, in addition to
31
+ # Retrievers::Base#ignorable_paths.
32
32
  class CheckoutBasedRetriever < Base
33
33
 
34
34
  # Check out repository into the directory. Occurs between
35
35
  # variable initialization and beginning scraping.
36
36
  def retrieve
37
+ raise RetrieverError.new("retriever is unavailable") unless available?
37
38
  if exists?
38
39
  begin
39
40
  @logger.operation(:updating) do
@@ -30,6 +30,28 @@ module RightScraper
30
30
  # somewhere. Uses command line curl and command line tar.
31
31
  class Download < Base
32
32
 
33
+ @@available = false
34
+
35
+ # Determines if downloader is available.
36
+ def available?
37
+ unless @@available
38
+ begin
39
+ # FIX: we might want to parse the result and require a minimum curl
40
+ # version.
41
+ cmd = "curl --version"
42
+ `#{cmd}`
43
+ if $?.success?
44
+ @@available = true
45
+ else
46
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
47
+ end
48
+ rescue
49
+ @logger.note_error($!, :available, "download retriever is unavailable")
50
+ end
51
+ end
52
+ @@available
53
+ end
54
+
33
55
  # Directory used to download tarballs
34
56
  def workdir
35
57
  File.join(@basedir, @repository.repository_hash)
@@ -42,6 +64,7 @@ module RightScraper
42
64
 
43
65
  # Download tarball and unpack it
44
66
  def retrieve
67
+ raise RetrieverError.new("download retriever is unavailable") unless available?
45
68
  FileUtils.remove_entry_secure workdir if File.exists?(workdir)
46
69
  FileUtils.mkdir_p repo_dir
47
70
  file = File.join(workdir, "package")
@@ -20,16 +20,43 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
- require 'git'
24
23
 
25
24
  module RightScraper
26
25
  module Retrievers
27
26
  # Retriever for resources stored in a git repository.
28
27
  class Git < CheckoutBasedRetriever
28
+
29
+ @@available = false
30
+
31
+ # Determines if downloader is available.
32
+ def available?
33
+ unless @@available
34
+ begin
35
+ require 'git'
36
+ # note that require 'git' does the same version check on load but
37
+ # we don't want to assume any particular implementation.
38
+ #
39
+ # FIX: we might want to parse the result and require a minimum git
40
+ # client version.
41
+ cmd = "git --version"
42
+ `#{cmd}`
43
+ if $?.success?
44
+ @@available = true
45
+ else
46
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
47
+ end
48
+ rescue
49
+ @logger.note_error($!, :available, "git retriever is unavailable")
50
+ end
51
+ end
52
+ @@available
53
+ end
54
+
29
55
  # In addition to normal retriever initialization, if the
30
56
  # underlying repository has a credential we need to initialize a
31
57
  # fresh SSHAgent and add the credential to it.
32
58
  def retrieve
59
+ raise RetrieverError.new("git retriever is unavailable") unless available?
33
60
  RightScraper::Processes::SSHAgent.with do |agent|
34
61
  agent.add_key(@repository.first_credential) unless
35
62
  @repository.first_credential.nil?
@@ -31,6 +31,28 @@ module RightScraper
31
31
 
32
32
  include RightScraper::SvnClient
33
33
 
34
+ @@available = false
35
+
36
+ # Determines if svn is available.
37
+ def available?
38
+ unless @@available
39
+ begin
40
+ # FIX: we might want to parse the result and require a minimum svn
41
+ # client version.
42
+ cmd = "svn --version"
43
+ `#{cmd}`
44
+ if $?.success?
45
+ @@available = true
46
+ else
47
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
48
+ end
49
+ rescue
50
+ @logger.note_error($!, :available, "svn retriever is unavailable")
51
+ end
52
+ end
53
+ @@available
54
+ end
55
+
34
56
  # Return true if a checkout exists. Currently tests for .svn in
35
57
  # the checkout.
36
58
  #
@@ -82,7 +82,7 @@ module RightScraper
82
82
  retriever = nil
83
83
  @logger.operation(:retrieving, "from #{repo}") do
84
84
  retriever = repo.retriever(@options)
85
- retriever.retrieve
85
+ retriever.retrieve if retriever.available?
86
86
  end
87
87
 
88
88
  # 2. Now scrape if there is a scraper in the options
@@ -41,6 +41,14 @@ module RightScraper
41
41
  # dir(Dir):: directory to begin search in
42
42
  def find_next(dir)
43
43
  @logger.operation(:finding_next_workflow, "in #{dir.path}") do
44
+
45
+ # Note: there could be multiple workflow definitions in one directory
46
+ # so we need to record the current position whether we found a workflow
47
+ # or not. The next iteration will search again in the current directory
48
+ # event if we found one. If we don't find one then we call
49
+ # 'search_dirs' which will recurse in the sub-directories.
50
+ @stack << dir
51
+
44
52
  def_ext = RightScraper::Resources::Workflow::DEFINITION_EXT
45
53
  meta_ext = RightScraper::Resources::Workflow::METADATA_EXT
46
54
  potentials = Dir[File.join(dir.path, "*#{def_ext}")]
@@ -56,7 +64,6 @@ module RightScraper
56
64
  workflow
57
65
  end
58
66
  else
59
- @stack << dir
60
67
  search_dirs
61
68
  end
62
69
  end
@@ -24,7 +24,7 @@ require 'rubygems'
24
24
 
25
25
  Gem::Specification.new do |spec|
26
26
  spec.name = 'right_scraper'
27
- spec.version = '3.0.0'
27
+ spec.version = '3.0.1'
28
28
  spec.authors = ['Graham Hughes', 'Raphael Simon']
29
29
  spec.email = 'raphael@rightscale.com'
30
30
  spec.homepage = 'https://github.com/rightscale/right_scraper'
data/right_scraper.rconf CHANGED
@@ -4,10 +4,10 @@
4
4
  #
5
5
  ruby do
6
6
  version 'ruby-1.9.2-p290'
7
- rubygems '1.6.2'
7
+ rubygems '1.8.10'
8
8
  gemset 'right_scraper'
9
9
  end
10
10
  bundler do
11
- version '1.0.10'
11
+ version '1.0.18'
12
12
  bundle_path File.join(ENV["HOME"], '.rightscale', 'right_scraper')
13
13
  end
@@ -83,10 +83,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
83
83
  bucket_name = 'this-bucket-does-not-exist'
84
84
  @s3.bucket(bucket_name).should be_nil
85
85
  lambda {
86
- @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
86
+ @scraper = @scraperclass.new(:repository => @repo,
87
+ :repo_dir => @download_repo_path,
87
88
  :scanners => [RightScraper::Scanners::CookbookMetadata,
88
89
  RightScraper::Scanners::CookbookManifest,
89
- RightScraper::Scanners::S3Upload],
90
+ RightScraper::Scanners::CookbookS3Upload],
90
91
  :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
91
92
  :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
92
93
  :s3_bucket => bucket_name,
@@ -106,10 +107,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
106
107
  :repo_type => :download,
107
108
  :url => "file:///#{@download_file}")
108
109
  bucket_name = 'com.rightscale.test.20100823'
109
- @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
110
+ @scraper = @scraperclass.new(:repository => @repo,
111
+ :repo_dir => @download_repo_path,
110
112
  :scanners => [RightScraper::Scanners::CookbookMetadata,
111
113
  RightScraper::Scanners::CookbookManifest,
112
- RightScraper::Scanners::S3Upload],
114
+ RightScraper::Scanners::CookbookS3Upload],
113
115
  :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
114
116
  :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
115
117
  :s3_bucket => bucket_name,
@@ -128,7 +130,7 @@ describe RightScraper::Scanners::CookbookS3Upload do
128
130
 
129
131
  context 'that has scraped' do
130
132
  before(:each) do
131
- @cookbook = @scraper.next
133
+ @cookbook = @scraper.next_resource
132
134
  @cookbook.should_not be_nil
133
135
  end
134
136
 
@@ -113,15 +113,17 @@ describe RightScraper::Retrievers::Git do
113
113
  include RightScraper::SpecHelpers::FromScratchScraping
114
114
  include RightScraper::SpecHelpers::WorkflowScraping
115
115
 
116
- it 'should still see only one workflow' do
116
+ it 'should see two workflows' do
117
+ @scraper.next_resource.should_not == nil
117
118
  @scraper.next_resource.should_not == nil
118
119
  @scraper.next_resource.should == nil
119
120
  end
120
121
 
121
122
  it 'should have the subworkflow in the manifest' do
122
123
  workflow = @scraper.next_resource
123
- workflow.manifest["workflow.def"].should == "15ce480ea6c94b51056e028b0e0bd7da8024d924"
124
- workflow.manifest["workflow.meta"].should == "5f36b2ea290645ee34d943220a14b54ee5ea5be5"
124
+ workflow = @scraper.next_resource
125
+ workflow.manifest["workflow.def"].should == "e687ad52d8fba8010a255e3c2a9e891264a24910"
126
+ workflow.manifest["workflow.meta"].should == "58060413e90f84add5b2dace3ba7e30d2689336f"
125
127
  end
126
128
  end
127
129
 
@@ -150,8 +152,30 @@ describe RightScraper::Retrievers::Git do
150
152
  end
151
153
  scraped.should have(@workflow_places.size).repositories
152
154
  end
155
+ end
156
+
157
+ context 'with two-level deep workflows' do
158
+ before(:each) do
159
+ @workflow_places = [File.join(@helper.repo_path, "workflows", "first"),
160
+ File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "second"),
161
+ File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "third")]
162
+ @workflow_places.each {|place| secondary_workflow(place)}
163
+ @helper.commit_content("secondary workflows added")
164
+ end
165
+
166
+ include RightScraper::SpecHelpers::FromScratchScraping
167
+ include RightScraper::SpecHelpers::WorkflowScraping
153
168
 
169
+ it 'should scrape' do
170
+ @scraper.scrape
171
+ @scraper.resources.each do |res|
172
+ res.metadata_path.should_not be_nil
173
+ res.definition_path.should_not be_nil
174
+ end
175
+ @scraper.resources.size.should == @workflow_places.size + 1 # One in the root repo_path
176
+ end
154
177
  end
178
+
155
179
  end
156
180
 
157
181
  context 'of cookbooks' do
@@ -40,12 +40,14 @@ describe RightScraper::Retrievers::Svn do
40
40
  before(:each) do
41
41
  pending "Not run unless REMOTE_USER and REMOTE_PASSWORD set" unless ENV['REMOTE_USER'] && ENV['REMOTE_PASSWORD']
42
42
  url = 'https://wush.net/svn/rightscale/cookbooks_test/'
43
+ @helper = RightScraper::SvnRetrieverSpecHelper.new
43
44
  @repo = RightScraper::Repositories::Base.from_hash(:display_name => 'wush',
44
45
  :repo_type => :svn,
45
46
  :url => url,
46
47
  :first_credential => ENV['REMOTE_USER'],
47
48
  :second_credential => ENV['REMOTE_PASSWORD'])
48
49
  @retriever = @retriever_class.new(@repo, :max_bytes => 1024**2,
50
+ :basedir => @helper.scraper_path,
49
51
  :max_seconds => 20)
50
52
  end
51
53
 
@@ -61,17 +63,17 @@ describe RightScraper::Retrievers::Svn do
61
63
 
62
64
  # quick_start not actually being a cookbook
63
65
  it 'should scrape 5 repositories' do
66
+ @retriever.retrieve
67
+ @scraper = RightScraper::Scrapers::Base.scraper(:kind => :cookbook,
68
+ :ignorable_paths => @retriever.ignorable_paths,
69
+ :repo_dir => @retriever.repo_dir,
70
+ :repository => @retriever.repository)
64
71
  locations = Set.new
65
72
  (1..5).each {|n|
66
73
  cookbook = @scraper.next_resource
67
74
  locations << cookbook.pos
68
75
  cookbook.should_not == nil
69
76
  }
70
- @retriever.retrieve
71
- @scraper = RightScraper::Scrapers::Base.scraper(:kind => :cookbook,
72
- :ignorable_paths => @retriever.ignorable_paths,
73
- :repo_dir => @retriever.repo_dir,
74
- :repository => @retriever.repository)
75
77
  @scraper.next_resource.should == nil
76
78
  locations.should == Set.new(["cookbooks/app_rails",
77
79
  "cookbooks/db_mysql",
@@ -131,8 +133,11 @@ describe RightScraper::Retrievers::Svn do
131
133
  include RightScraper::SpecHelpers::CookbookScraping
132
134
 
133
135
  it 'should scrape' do
134
- @cookbook_places.each do |place|
135
- check_resource @scraper.next_resource, :position => place[@helper.repo_path.length+1..-1]
136
+ scraped = []
137
+ while scrape = @scraper.next_resource
138
+ place = (@cookbook_places - scraped).detect {|place| File.join(@helper.repo_path, scrape.pos) == place}
139
+ scraped << place
140
+ check_resource scrape, :position => place[@helper.repo_path.length+1..-1]
136
141
  end
137
142
  scraped.should have(@cookbook_places.size).repositories
138
143
  end
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: right_scraper
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
5
- prerelease: false
6
- segments:
7
- - 3
8
- - 0
9
- - 0
10
- version: 3.0.0
4
+ prerelease:
5
+ version: 3.0.1
11
6
  platform: ruby
12
7
  authors:
13
8
  - Graham Hughes
@@ -16,129 +11,96 @@ autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
13
 
19
- date: 2011-09-26 00:00:00 -07:00
20
- default_executable:
14
+ date: 2011-10-25 00:00:00 Z
21
15
  dependencies:
22
16
  - !ruby/object:Gem::Dependency
23
- version_requirements: &id001 !ruby/object:Gem::Requirement
17
+ name: json
18
+ requirement: &id001 !ruby/object:Gem::Requirement
24
19
  none: false
25
20
  requirements:
26
21
  - - ">="
27
22
  - !ruby/object:Gem::Version
28
- hash: 13
29
- segments:
30
- - 1
31
- - 4
32
- - 5
33
23
  version: 1.4.5
34
- requirement: *id001
35
24
  type: :runtime
36
- name: json
37
25
  prerelease: false
26
+ version_requirements: *id001
38
27
  - !ruby/object:Gem::Dependency
39
- version_requirements: &id002 !ruby/object:Gem::Requirement
28
+ name: git
29
+ requirement: &id002 !ruby/object:Gem::Requirement
40
30
  none: false
41
31
  requirements:
42
32
  - - ">="
43
33
  - !ruby/object:Gem::Version
44
- hash: 21
45
- segments:
46
- - 1
47
- - 2
48
- - 5
49
34
  version: 1.2.5
50
- requirement: *id002
51
35
  type: :runtime
52
- name: git
53
36
  prerelease: false
37
+ version_requirements: *id002
54
38
  - !ruby/object:Gem::Dependency
55
- version_requirements: &id003 !ruby/object:Gem::Requirement
39
+ name: libarchive
40
+ requirement: &id003 !ruby/object:Gem::Requirement
56
41
  none: false
57
42
  requirements:
58
43
  - - ">="
59
44
  - !ruby/object:Gem::Version
60
- hash: 25
61
- segments:
62
- - 0
63
- - 1
64
- - 1
65
45
  version: 0.1.1
66
- requirement: *id003
67
46
  type: :runtime
68
- name: libarchive
69
47
  prerelease: false
48
+ version_requirements: *id003
70
49
  - !ruby/object:Gem::Dependency
71
- version_requirements: &id004 !ruby/object:Gem::Requirement
50
+ name: right_aws
51
+ requirement: &id004 !ruby/object:Gem::Requirement
72
52
  none: false
73
53
  requirements:
74
54
  - - ">="
75
55
  - !ruby/object:Gem::Version
76
- hash: 3
77
- segments:
78
- - 2
79
- - 0
80
56
  version: "2.0"
81
- requirement: *id004
82
57
  type: :runtime
83
- name: right_aws
84
58
  prerelease: false
59
+ version_requirements: *id004
85
60
  - !ruby/object:Gem::Dependency
86
- version_requirements: &id005 !ruby/object:Gem::Requirement
61
+ name: process_watcher
62
+ requirement: &id005 !ruby/object:Gem::Requirement
87
63
  none: false
88
64
  requirements:
89
65
  - - ~>
90
66
  - !ruby/object:Gem::Version
91
- hash: 13
92
- segments:
93
- - 0
94
- - 3
95
67
  version: "0.3"
96
- requirement: *id005
97
68
  type: :runtime
98
- name: process_watcher
99
69
  prerelease: false
70
+ version_requirements: *id005
100
71
  - !ruby/object:Gem::Dependency
101
- version_requirements: &id006 !ruby/object:Gem::Requirement
72
+ name: rspec
73
+ requirement: &id006 !ruby/object:Gem::Requirement
102
74
  none: false
103
75
  requirements:
104
76
  - - ">="
105
77
  - !ruby/object:Gem::Version
106
- hash: 3
107
- segments:
108
- - 0
109
78
  version: "0"
110
- requirement: *id006
111
79
  type: :development
112
- name: rspec
113
80
  prerelease: false
81
+ version_requirements: *id006
114
82
  - !ruby/object:Gem::Dependency
115
- version_requirements: &id007 !ruby/object:Gem::Requirement
83
+ name: flexmock
84
+ requirement: &id007 !ruby/object:Gem::Requirement
116
85
  none: false
117
86
  requirements:
118
87
  - - ">="
119
88
  - !ruby/object:Gem::Version
120
- hash: 3
121
- segments:
122
- - 0
123
89
  version: "0"
124
- requirement: *id007
125
90
  type: :development
126
- name: flexmock
127
91
  prerelease: false
92
+ version_requirements: *id007
128
93
  - !ruby/object:Gem::Dependency
129
- version_requirements: &id008 !ruby/object:Gem::Requirement
94
+ name: rtags
95
+ requirement: &id008 !ruby/object:Gem::Requirement
130
96
  none: false
131
97
  requirements:
132
98
  - - ">="
133
99
  - !ruby/object:Gem::Version
134
- hash: 3
135
- segments:
136
- - 0
137
100
  version: "0"
138
- requirement: *id008
139
101
  type: :development
140
- name: rtags
141
102
  prerelease: false
103
+ version_requirements: *id008
142
104
  description: " RightScraper provides a simple interface to download and keep local copies of remote\n repositories up-to-date using the following protocols:\n * git: RightScraper will clone then pull repos from git\n * SVN: RightScraper will checkout then update SVN repositories\n * tarballs: RightScraper will download, optionally uncompress and expand a given tar file\n On top of retrieving remote repositories, right_scraper also include \"scrapers\" that\n will analyze the repository content and instantiate \"resources\" as a result. Currently\n supported resources are Chef cookbooks and RightScale workflow definitions.\n"
143
105
  email: raphael@rightscale.com
144
106
  executables: []
@@ -225,7 +187,6 @@ files:
225
187
  - spec/svn/svn_retriever_spec_helper.rb
226
188
  - spec/svn/url_spec.rb
227
189
  - spec/url_spec.rb
228
- has_rdoc: true
229
190
  homepage: https://github.com/rightscale/right_scraper
230
191
  licenses: []
231
192
 
@@ -242,18 +203,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
242
203
  requirements:
243
204
  - - ">="
244
205
  - !ruby/object:Gem::Version
245
- hash: 57
246
- segments:
247
- - 1
248
- - 8
249
- - 7
250
206
  version: 1.8.7
251
207
  required_rubygems_version: !ruby/object:Gem::Requirement
252
208
  none: false
253
209
  requirements:
254
210
  - - ">="
255
211
  - !ruby/object:Gem::Version
256
- hash: 3
212
+ hash: 1773174315538447853
257
213
  segments:
258
214
  - 0
259
215
  version: "0"
@@ -262,7 +218,7 @@ requirements:
262
218
  - curl command line client
263
219
  - Subversion command line client
264
220
  rubyforge_project: right_scraper
265
- rubygems_version: 1.3.7
221
+ rubygems_version: 1.8.10
266
222
  signing_key:
267
223
  specification_version: 3
268
224
  summary: Download and update remote repositories