right_scraper 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,9 @@ module RightScraper
43
43
  # String:: Path to directory where files are retrieved
44
44
  attr_reader :repo_dir
45
45
 
46
+ # exceptions
47
+ class RetrieverError < Exception; end
48
+
46
49
  # Create a new retriever for the given repository. This class
47
50
  # recognizes several options, and subclasses may recognize
48
51
  # additional options. Options may never be required.
@@ -73,6 +76,11 @@ module RightScraper
73
76
  end
74
77
  end
75
78
 
79
+ # Determines if retriever is available (has required CLI tools, etc.)
80
+ def available?
81
+ raise NotImplementedError
82
+ end
83
+
76
84
  # Paths to ignore when traversing the filesystem. Mostly used for
77
85
  # things like Git and Subversion version control directories.
78
86
  #
@@ -24,16 +24,17 @@
24
24
  module RightScraper
25
25
  module Retrievers
26
26
 
27
- # Base class for retrievers that want to do version control
28
- # operations (CVS, SVN, etc.). Subclasses can get away with
29
- # implementing only #do_checkout but to support incremental
30
- # operation need to implement #exists? and #do_update, in addition
31
- # to Retrievers::Base#ignorable_paths.
27
+ # Base class for retrievers that want to do version control operations
28
+ # (CVS, SVN, etc.). Subclasses can get away with implementing only
29
+ # Retrievers::Base#available? and #do_checkout but to support incremental
30
+ # operation need to implement #exists? and #do_update, in addition to
31
+ # Retrievers::Base#ignorable_paths.
32
32
  class CheckoutBasedRetriever < Base
33
33
 
34
34
  # Check out repository into the directory. Occurs between
35
35
  # variable initialization and beginning scraping.
36
36
  def retrieve
37
+ raise RetrieverError.new("retriever is unavailable") unless available?
37
38
  if exists?
38
39
  begin
39
40
  @logger.operation(:updating) do
@@ -30,6 +30,28 @@ module RightScraper
30
30
  # somewhere. Uses command line curl and command line tar.
31
31
  class Download < Base
32
32
 
33
+ @@available = false
34
+
35
+ # Determines if downloader is available.
36
+ def available?
37
+ unless @@available
38
+ begin
39
+ # FIX: we might want to parse the result and require a minimum curl
40
+ # version.
41
+ cmd = "curl --version"
42
+ `#{cmd}`
43
+ if $?.success?
44
+ @@available = true
45
+ else
46
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
47
+ end
48
+ rescue
49
+ @logger.note_error($!, :available, "download retriever is unavailable")
50
+ end
51
+ end
52
+ @@available
53
+ end
54
+
33
55
  # Directory used to download tarballs
34
56
  def workdir
35
57
  File.join(@basedir, @repository.repository_hash)
@@ -42,6 +64,7 @@ module RightScraper
42
64
 
43
65
  # Download tarball and unpack it
44
66
  def retrieve
67
+ raise RetrieverError.new("download retriever is unavailable") unless available?
45
68
  FileUtils.remove_entry_secure workdir if File.exists?(workdir)
46
69
  FileUtils.mkdir_p repo_dir
47
70
  file = File.join(workdir, "package")
@@ -20,16 +20,43 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
- require 'git'
24
23
 
25
24
  module RightScraper
26
25
  module Retrievers
27
26
  # Retriever for resources stored in a git repository.
28
27
  class Git < CheckoutBasedRetriever
28
+
29
+ @@available = false
30
+
31
+ # Determines if downloader is available.
32
+ def available?
33
+ unless @@available
34
+ begin
35
+ require 'git'
36
+ # note that require 'git' does the same version check on load but
37
+ # we don't want to assume any particular implementation.
38
+ #
39
+ # FIX: we might want to parse the result and require a minimum git
40
+ # client version.
41
+ cmd = "git --version"
42
+ `#{cmd}`
43
+ if $?.success?
44
+ @@available = true
45
+ else
46
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
47
+ end
48
+ rescue
49
+ @logger.note_error($!, :available, "git retriever is unavailable")
50
+ end
51
+ end
52
+ @@available
53
+ end
54
+
29
55
  # In addition to normal retriever initialization, if the
30
56
  # underlying repository has a credential we need to initialize a
31
57
  # fresh SSHAgent and add the credential to it.
32
58
  def retrieve
59
+ raise RetrieverError.new("git retriever is unavailable") unless available?
33
60
  RightScraper::Processes::SSHAgent.with do |agent|
34
61
  agent.add_key(@repository.first_credential) unless
35
62
  @repository.first_credential.nil?
@@ -31,6 +31,28 @@ module RightScraper
31
31
 
32
32
  include RightScraper::SvnClient
33
33
 
34
+ @@available = false
35
+
36
+ # Determines if svn is available.
37
+ def available?
38
+ unless @@available
39
+ begin
40
+ # FIX: we might want to parse the result and require a minimum svn
41
+ # client version.
42
+ cmd = "svn --version"
43
+ `#{cmd}`
44
+ if $?.success?
45
+ @@available = true
46
+ else
47
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
48
+ end
49
+ rescue
50
+ @logger.note_error($!, :available, "svn retriever is unavailable")
51
+ end
52
+ end
53
+ @@available
54
+ end
55
+
34
56
  # Return true if a checkout exists. Currently tests for .svn in
35
57
  # the checkout.
36
58
  #
@@ -82,7 +82,7 @@ module RightScraper
82
82
  retriever = nil
83
83
  @logger.operation(:retrieving, "from #{repo}") do
84
84
  retriever = repo.retriever(@options)
85
- retriever.retrieve
85
+ retriever.retrieve if retriever.available?
86
86
  end
87
87
 
88
88
  # 2. Now scrape if there is a scraper in the options
@@ -41,6 +41,14 @@ module RightScraper
41
41
  # dir(Dir):: directory to begin search in
42
42
  def find_next(dir)
43
43
  @logger.operation(:finding_next_workflow, "in #{dir.path}") do
44
+
45
+ # Note: there could be multiple workflow definitions in one directory
46
+ # so we need to record the current position whether we found a workflow
47
+ # or not. The next iteration will search again in the current directory
48
+ # event if we found one. If we don't find one then we call
49
+ # 'search_dirs' which will recurse in the sub-directories.
50
+ @stack << dir
51
+
44
52
  def_ext = RightScraper::Resources::Workflow::DEFINITION_EXT
45
53
  meta_ext = RightScraper::Resources::Workflow::METADATA_EXT
46
54
  potentials = Dir[File.join(dir.path, "*#{def_ext}")]
@@ -56,7 +64,6 @@ module RightScraper
56
64
  workflow
57
65
  end
58
66
  else
59
- @stack << dir
60
67
  search_dirs
61
68
  end
62
69
  end
@@ -24,7 +24,7 @@ require 'rubygems'
24
24
 
25
25
  Gem::Specification.new do |spec|
26
26
  spec.name = 'right_scraper'
27
- spec.version = '3.0.0'
27
+ spec.version = '3.0.1'
28
28
  spec.authors = ['Graham Hughes', 'Raphael Simon']
29
29
  spec.email = 'raphael@rightscale.com'
30
30
  spec.homepage = 'https://github.com/rightscale/right_scraper'
data/right_scraper.rconf CHANGED
@@ -4,10 +4,10 @@
4
4
  #
5
5
  ruby do
6
6
  version 'ruby-1.9.2-p290'
7
- rubygems '1.6.2'
7
+ rubygems '1.8.10'
8
8
  gemset 'right_scraper'
9
9
  end
10
10
  bundler do
11
- version '1.0.10'
11
+ version '1.0.18'
12
12
  bundle_path File.join(ENV["HOME"], '.rightscale', 'right_scraper')
13
13
  end
@@ -83,10 +83,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
83
83
  bucket_name = 'this-bucket-does-not-exist'
84
84
  @s3.bucket(bucket_name).should be_nil
85
85
  lambda {
86
- @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
86
+ @scraper = @scraperclass.new(:repository => @repo,
87
+ :repo_dir => @download_repo_path,
87
88
  :scanners => [RightScraper::Scanners::CookbookMetadata,
88
89
  RightScraper::Scanners::CookbookManifest,
89
- RightScraper::Scanners::S3Upload],
90
+ RightScraper::Scanners::CookbookS3Upload],
90
91
  :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
91
92
  :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
92
93
  :s3_bucket => bucket_name,
@@ -106,10 +107,11 @@ describe RightScraper::Scanners::CookbookS3Upload do
106
107
  :repo_type => :download,
107
108
  :url => "file:///#{@download_file}")
108
109
  bucket_name = 'com.rightscale.test.20100823'
109
- @scraper = @scraperclass.new(:repo_dir => @download_repo_path,
110
+ @scraper = @scraperclass.new(:repository => @repo,
111
+ :repo_dir => @download_repo_path,
110
112
  :scanners => [RightScraper::Scanners::CookbookMetadata,
111
113
  RightScraper::Scanners::CookbookManifest,
112
- RightScraper::Scanners::S3Upload],
114
+ RightScraper::Scanners::CookbookS3Upload],
113
115
  :s3_key => ENV['AMAZON_ACCESS_KEY_ID'],
114
116
  :s3_secret => ENV['AMAZON_SECRET_ACCESS_KEY'],
115
117
  :s3_bucket => bucket_name,
@@ -128,7 +130,7 @@ describe RightScraper::Scanners::CookbookS3Upload do
128
130
 
129
131
  context 'that has scraped' do
130
132
  before(:each) do
131
- @cookbook = @scraper.next
133
+ @cookbook = @scraper.next_resource
132
134
  @cookbook.should_not be_nil
133
135
  end
134
136
 
@@ -113,15 +113,17 @@ describe RightScraper::Retrievers::Git do
113
113
  include RightScraper::SpecHelpers::FromScratchScraping
114
114
  include RightScraper::SpecHelpers::WorkflowScraping
115
115
 
116
- it 'should still see only one workflow' do
116
+ it 'should see two workflows' do
117
+ @scraper.next_resource.should_not == nil
117
118
  @scraper.next_resource.should_not == nil
118
119
  @scraper.next_resource.should == nil
119
120
  end
120
121
 
121
122
  it 'should have the subworkflow in the manifest' do
122
123
  workflow = @scraper.next_resource
123
- workflow.manifest["workflow.def"].should == "15ce480ea6c94b51056e028b0e0bd7da8024d924"
124
- workflow.manifest["workflow.meta"].should == "5f36b2ea290645ee34d943220a14b54ee5ea5be5"
124
+ workflow = @scraper.next_resource
125
+ workflow.manifest["workflow.def"].should == "e687ad52d8fba8010a255e3c2a9e891264a24910"
126
+ workflow.manifest["workflow.meta"].should == "58060413e90f84add5b2dace3ba7e30d2689336f"
125
127
  end
126
128
  end
127
129
 
@@ -150,8 +152,30 @@ describe RightScraper::Retrievers::Git do
150
152
  end
151
153
  scraped.should have(@workflow_places.size).repositories
152
154
  end
155
+ end
156
+
157
+ context 'with two-level deep workflows' do
158
+ before(:each) do
159
+ @workflow_places = [File.join(@helper.repo_path, "workflows", "first"),
160
+ File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "second"),
161
+ File.join(@helper.repo_path, "workflows", "some_dir", "some_subdir", "third")]
162
+ @workflow_places.each {|place| secondary_workflow(place)}
163
+ @helper.commit_content("secondary workflows added")
164
+ end
165
+
166
+ include RightScraper::SpecHelpers::FromScratchScraping
167
+ include RightScraper::SpecHelpers::WorkflowScraping
153
168
 
169
+ it 'should scrape' do
170
+ @scraper.scrape
171
+ @scraper.resources.each do |res|
172
+ res.metadata_path.should_not be_nil
173
+ res.definition_path.should_not be_nil
174
+ end
175
+ @scraper.resources.size.should == @workflow_places.size + 1 # One in the root repo_path
176
+ end
154
177
  end
178
+
155
179
  end
156
180
 
157
181
  context 'of cookbooks' do
@@ -40,12 +40,14 @@ describe RightScraper::Retrievers::Svn do
40
40
  before(:each) do
41
41
  pending "Not run unless REMOTE_USER and REMOTE_PASSWORD set" unless ENV['REMOTE_USER'] && ENV['REMOTE_PASSWORD']
42
42
  url = 'https://wush.net/svn/rightscale/cookbooks_test/'
43
+ @helper = RightScraper::SvnRetrieverSpecHelper.new
43
44
  @repo = RightScraper::Repositories::Base.from_hash(:display_name => 'wush',
44
45
  :repo_type => :svn,
45
46
  :url => url,
46
47
  :first_credential => ENV['REMOTE_USER'],
47
48
  :second_credential => ENV['REMOTE_PASSWORD'])
48
49
  @retriever = @retriever_class.new(@repo, :max_bytes => 1024**2,
50
+ :basedir => @helper.scraper_path,
49
51
  :max_seconds => 20)
50
52
  end
51
53
 
@@ -61,17 +63,17 @@ describe RightScraper::Retrievers::Svn do
61
63
 
62
64
  # quick_start not actually being a cookbook
63
65
  it 'should scrape 5 repositories' do
66
+ @retriever.retrieve
67
+ @scraper = RightScraper::Scrapers::Base.scraper(:kind => :cookbook,
68
+ :ignorable_paths => @retriever.ignorable_paths,
69
+ :repo_dir => @retriever.repo_dir,
70
+ :repository => @retriever.repository)
64
71
  locations = Set.new
65
72
  (1..5).each {|n|
66
73
  cookbook = @scraper.next_resource
67
74
  locations << cookbook.pos
68
75
  cookbook.should_not == nil
69
76
  }
70
- @retriever.retrieve
71
- @scraper = RightScraper::Scrapers::Base.scraper(:kind => :cookbook,
72
- :ignorable_paths => @retriever.ignorable_paths,
73
- :repo_dir => @retriever.repo_dir,
74
- :repository => @retriever.repository)
75
77
  @scraper.next_resource.should == nil
76
78
  locations.should == Set.new(["cookbooks/app_rails",
77
79
  "cookbooks/db_mysql",
@@ -131,8 +133,11 @@ describe RightScraper::Retrievers::Svn do
131
133
  include RightScraper::SpecHelpers::CookbookScraping
132
134
 
133
135
  it 'should scrape' do
134
- @cookbook_places.each do |place|
135
- check_resource @scraper.next_resource, :position => place[@helper.repo_path.length+1..-1]
136
+ scraped = []
137
+ while scrape = @scraper.next_resource
138
+ place = (@cookbook_places - scraped).detect {|place| File.join(@helper.repo_path, scrape.pos) == place}
139
+ scraped << place
140
+ check_resource scrape, :position => place[@helper.repo_path.length+1..-1]
136
141
  end
137
142
  scraped.should have(@cookbook_places.size).repositories
138
143
  end
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: right_scraper
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
5
- prerelease: false
6
- segments:
7
- - 3
8
- - 0
9
- - 0
10
- version: 3.0.0
4
+ prerelease:
5
+ version: 3.0.1
11
6
  platform: ruby
12
7
  authors:
13
8
  - Graham Hughes
@@ -16,129 +11,96 @@ autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
13
 
19
- date: 2011-09-26 00:00:00 -07:00
20
- default_executable:
14
+ date: 2011-10-25 00:00:00 Z
21
15
  dependencies:
22
16
  - !ruby/object:Gem::Dependency
23
- version_requirements: &id001 !ruby/object:Gem::Requirement
17
+ name: json
18
+ requirement: &id001 !ruby/object:Gem::Requirement
24
19
  none: false
25
20
  requirements:
26
21
  - - ">="
27
22
  - !ruby/object:Gem::Version
28
- hash: 13
29
- segments:
30
- - 1
31
- - 4
32
- - 5
33
23
  version: 1.4.5
34
- requirement: *id001
35
24
  type: :runtime
36
- name: json
37
25
  prerelease: false
26
+ version_requirements: *id001
38
27
  - !ruby/object:Gem::Dependency
39
- version_requirements: &id002 !ruby/object:Gem::Requirement
28
+ name: git
29
+ requirement: &id002 !ruby/object:Gem::Requirement
40
30
  none: false
41
31
  requirements:
42
32
  - - ">="
43
33
  - !ruby/object:Gem::Version
44
- hash: 21
45
- segments:
46
- - 1
47
- - 2
48
- - 5
49
34
  version: 1.2.5
50
- requirement: *id002
51
35
  type: :runtime
52
- name: git
53
36
  prerelease: false
37
+ version_requirements: *id002
54
38
  - !ruby/object:Gem::Dependency
55
- version_requirements: &id003 !ruby/object:Gem::Requirement
39
+ name: libarchive
40
+ requirement: &id003 !ruby/object:Gem::Requirement
56
41
  none: false
57
42
  requirements:
58
43
  - - ">="
59
44
  - !ruby/object:Gem::Version
60
- hash: 25
61
- segments:
62
- - 0
63
- - 1
64
- - 1
65
45
  version: 0.1.1
66
- requirement: *id003
67
46
  type: :runtime
68
- name: libarchive
69
47
  prerelease: false
48
+ version_requirements: *id003
70
49
  - !ruby/object:Gem::Dependency
71
- version_requirements: &id004 !ruby/object:Gem::Requirement
50
+ name: right_aws
51
+ requirement: &id004 !ruby/object:Gem::Requirement
72
52
  none: false
73
53
  requirements:
74
54
  - - ">="
75
55
  - !ruby/object:Gem::Version
76
- hash: 3
77
- segments:
78
- - 2
79
- - 0
80
56
  version: "2.0"
81
- requirement: *id004
82
57
  type: :runtime
83
- name: right_aws
84
58
  prerelease: false
59
+ version_requirements: *id004
85
60
  - !ruby/object:Gem::Dependency
86
- version_requirements: &id005 !ruby/object:Gem::Requirement
61
+ name: process_watcher
62
+ requirement: &id005 !ruby/object:Gem::Requirement
87
63
  none: false
88
64
  requirements:
89
65
  - - ~>
90
66
  - !ruby/object:Gem::Version
91
- hash: 13
92
- segments:
93
- - 0
94
- - 3
95
67
  version: "0.3"
96
- requirement: *id005
97
68
  type: :runtime
98
- name: process_watcher
99
69
  prerelease: false
70
+ version_requirements: *id005
100
71
  - !ruby/object:Gem::Dependency
101
- version_requirements: &id006 !ruby/object:Gem::Requirement
72
+ name: rspec
73
+ requirement: &id006 !ruby/object:Gem::Requirement
102
74
  none: false
103
75
  requirements:
104
76
  - - ">="
105
77
  - !ruby/object:Gem::Version
106
- hash: 3
107
- segments:
108
- - 0
109
78
  version: "0"
110
- requirement: *id006
111
79
  type: :development
112
- name: rspec
113
80
  prerelease: false
81
+ version_requirements: *id006
114
82
  - !ruby/object:Gem::Dependency
115
- version_requirements: &id007 !ruby/object:Gem::Requirement
83
+ name: flexmock
84
+ requirement: &id007 !ruby/object:Gem::Requirement
116
85
  none: false
117
86
  requirements:
118
87
  - - ">="
119
88
  - !ruby/object:Gem::Version
120
- hash: 3
121
- segments:
122
- - 0
123
89
  version: "0"
124
- requirement: *id007
125
90
  type: :development
126
- name: flexmock
127
91
  prerelease: false
92
+ version_requirements: *id007
128
93
  - !ruby/object:Gem::Dependency
129
- version_requirements: &id008 !ruby/object:Gem::Requirement
94
+ name: rtags
95
+ requirement: &id008 !ruby/object:Gem::Requirement
130
96
  none: false
131
97
  requirements:
132
98
  - - ">="
133
99
  - !ruby/object:Gem::Version
134
- hash: 3
135
- segments:
136
- - 0
137
100
  version: "0"
138
- requirement: *id008
139
101
  type: :development
140
- name: rtags
141
102
  prerelease: false
103
+ version_requirements: *id008
142
104
  description: " RightScraper provides a simple interface to download and keep local copies of remote\n repositories up-to-date using the following protocols:\n * git: RightScraper will clone then pull repos from git\n * SVN: RightScraper will checkout then update SVN repositories\n * tarballs: RightScraper will download, optionally uncompress and expand a given tar file\n On top of retrieving remote repositories, right_scraper also include \"scrapers\" that\n will analyze the repository content and instantiate \"resources\" as a result. Currently\n supported resources are Chef cookbooks and RightScale workflow definitions.\n"
143
105
  email: raphael@rightscale.com
144
106
  executables: []
@@ -225,7 +187,6 @@ files:
225
187
  - spec/svn/svn_retriever_spec_helper.rb
226
188
  - spec/svn/url_spec.rb
227
189
  - spec/url_spec.rb
228
- has_rdoc: true
229
190
  homepage: https://github.com/rightscale/right_scraper
230
191
  licenses: []
231
192
 
@@ -242,18 +203,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
242
203
  requirements:
243
204
  - - ">="
244
205
  - !ruby/object:Gem::Version
245
- hash: 57
246
- segments:
247
- - 1
248
- - 8
249
- - 7
250
206
  version: 1.8.7
251
207
  required_rubygems_version: !ruby/object:Gem::Requirement
252
208
  none: false
253
209
  requirements:
254
210
  - - ">="
255
211
  - !ruby/object:Gem::Version
256
- hash: 3
212
+ hash: 1773174315538447853
257
213
  segments:
258
214
  - 0
259
215
  version: "0"
@@ -262,7 +218,7 @@ requirements:
262
218
  - curl command line client
263
219
  - Subversion command line client
264
220
  rubyforge_project: right_scraper
265
- rubygems_version: 1.3.7
221
+ rubygems_version: 1.8.10
266
222
  signing_key:
267
223
  specification_version: 3
268
224
  summary: Download and update remote repositories