right_scraper 1.0.26 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +16 -0
- data/README.rdoc +9 -28
- data/Rakefile +51 -39
- data/lib/right_scraper/builders/base.rb +64 -0
- data/lib/right_scraper/builders/filesystem.rb +96 -0
- data/lib/right_scraper/builders/union.rb +57 -0
- data/lib/right_scraper/logger.rb +102 -0
- data/lib/right_scraper/loggers/noisy.rb +85 -0
- data/lib/right_scraper/processes/ssh.rb +188 -0
- data/lib/right_scraper/repositories/base.rb +299 -0
- data/lib/right_scraper/repositories/download.rb +90 -0
- data/lib/right_scraper/repositories/git.rb +92 -0
- data/lib/right_scraper/repositories/mock.rb +70 -0
- data/lib/right_scraper/repositories/svn.rb +96 -0
- data/lib/right_scraper/resources/base.rb +70 -0
- data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
- data/lib/right_scraper/resources/workflow.rb +55 -0
- data/lib/right_scraper/retrievers/base.rb +114 -0
- data/lib/right_scraper/retrievers/checkout.rb +79 -0
- data/lib/right_scraper/retrievers/download.rb +97 -0
- data/lib/right_scraper/retrievers/git.rb +140 -0
- data/lib/right_scraper/retrievers/svn.rb +87 -0
- data/lib/right_scraper/scanners/base.rb +111 -0
- data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
- data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
- data/lib/right_scraper/scanners/union.rb +89 -0
- data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
- data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
- data/lib/right_scraper/scraper.rb +81 -57
- data/lib/right_scraper/scraper_logger.rb +61 -0
- data/lib/right_scraper/scrapers/base.rb +262 -0
- data/lib/right_scraper/scrapers/cookbook.rb +73 -0
- data/lib/right_scraper/scrapers/workflow.rb +88 -0
- data/lib/right_scraper/svn_client.rb +101 -0
- data/lib/right_scraper/version.rb +28 -0
- data/lib/right_scraper.rb +35 -11
- data/right_scraper.gemspec +26 -13
- data/right_scraper.rconf +13 -0
- data/spec/builder_spec.rb +50 -0
- data/spec/cookbook_helper.rb +73 -0
- data/spec/cookbook_manifest_spec.rb +55 -0
- data/spec/cookbook_s3_upload_spec.rb +152 -0
- data/spec/download/download_retriever_spec.rb +118 -0
- data/spec/download/download_retriever_spec_helper.rb +72 -0
- data/spec/download/download_spec.rb +130 -0
- data/spec/download/multi_dir_spec.rb +106 -0
- data/spec/download/multi_dir_spec_helper.rb +40 -0
- data/spec/git/cookbook_spec.rb +166 -0
- data/spec/git/demokey +27 -0
- data/spec/git/demokey.pub +1 -0
- data/spec/git/password_key +30 -0
- data/spec/git/password_key.pub +1 -0
- data/spec/git/repository_spec.rb +110 -0
- data/spec/git/retriever_spec.rb +505 -0
- data/spec/git/retriever_spec_helper.rb +112 -0
- data/spec/git/scraper_spec.rb +136 -0
- data/spec/git/ssh_spec.rb +170 -0
- data/spec/git/url_spec.rb +103 -0
- data/spec/logger_spec.rb +185 -0
- data/spec/repository_spec.rb +89 -23
- data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
- data/spec/scanner_spec.rb +61 -0
- data/spec/scraper_helper.rb +96 -0
- data/spec/scraper_spec.rb +123 -45
- data/spec/spec_helper.rb +87 -14
- data/spec/svn/cookbook_spec.rb +97 -0
- data/spec/svn/multi_svn_spec.rb +64 -0
- data/spec/svn/multi_svn_spec_helper.rb +40 -0
- data/spec/svn/repository_spec.rb +72 -0
- data/spec/svn/retriever_spec.rb +261 -0
- data/spec/svn/scraper_spec.rb +90 -0
- data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
- data/spec/svn/url_spec.rb +47 -0
- data/spec/url_spec.rb +164 -0
- metadata +203 -31
- data/lib/right_scraper/linux/process_monitor.rb +0 -84
- data/lib/right_scraper/repository.rb +0 -78
- data/lib/right_scraper/scraper_base.rb +0 -175
- data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
- data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
- data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
- data/lib/right_scraper/watcher.rb +0 -158
- data/lib/right_scraper/win32/process_monitor.rb +0 -98
- data/spec/download/download_scraper_spec.rb +0 -94
- data/spec/git/git_scraper_spec.rb +0 -165
- data/spec/git/git_scraper_spec_helper.rb +0 -72
- data/spec/rcov.opts +0 -1
- data/spec/spec.opts +0 -2
- data/spec/svn/svn_scraper_spec.rb +0 -148
- data/spec/watcher_spec.rb +0 -74
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
|
|
24
|
+
|
|
25
|
+
module RightScraper
|
|
26
|
+
module Repositories
|
|
27
|
+
# A "repository" that is just there for testing. This class is not
|
|
28
|
+
# loaded by default.
|
|
29
|
+
class Mock < Base
|
|
30
|
+
# Create a new mock repository.
|
|
31
|
+
def initialize
|
|
32
|
+
@repo_type = :mock
|
|
33
|
+
end
|
|
34
|
+
# (String) Type of the repository, here 'download'.
|
|
35
|
+
attr_accessor :repo_type
|
|
36
|
+
|
|
37
|
+
# (String) Optional, tag or branch of repository that should be downloaded
|
|
38
|
+
attr_accessor :tag
|
|
39
|
+
|
|
40
|
+
# (String) Optional, username
|
|
41
|
+
attr_accessor :first_credential
|
|
42
|
+
|
|
43
|
+
# (String) Optional, password
|
|
44
|
+
attr_accessor :second_credential
|
|
45
|
+
|
|
46
|
+
# Unique representation for this repo, should resolve to the same string
|
|
47
|
+
# for repos that should be cloned in same directory
|
|
48
|
+
#
|
|
49
|
+
# === Returns
|
|
50
|
+
# res(String):: Unique representation for this repo
|
|
51
|
+
def to_s
|
|
52
|
+
res = "mock #{url}:#{tag}"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# (Base class) Appropriate class for scraping this sort of
|
|
56
|
+
# repository.
|
|
57
|
+
def scraper
|
|
58
|
+
@@scraper || raise("Scraper for mocks isn't defined yet")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Set the correct sort of scraper to use for mock repositories.
|
|
62
|
+
def self.scraper=(scraper)
|
|
63
|
+
@@scraper = scraper
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Add this repository to the list of available types.
|
|
67
|
+
@@types[:mock] = RightScraper::Repositories::Mock
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScraper
|
|
25
|
+
module Repositories
|
|
26
|
+
# A repository that is stored in a Subversion server.
|
|
27
|
+
class Svn < Base
|
|
28
|
+
|
|
29
|
+
# (String) Optional, tag or branch of repository that should be downloaded
|
|
30
|
+
attr_accessor :tag
|
|
31
|
+
alias_method :revision, :tag
|
|
32
|
+
|
|
33
|
+
# (String) Optional, SVN username
|
|
34
|
+
attr_accessor :first_credential
|
|
35
|
+
alias_method :username, :first_credential
|
|
36
|
+
|
|
37
|
+
# (String) Optional, SVN password
|
|
38
|
+
attr_accessor :second_credential
|
|
39
|
+
alias_method :password, :second_credential
|
|
40
|
+
|
|
41
|
+
# Create a new SvnRepository. If the tag is not specified,
|
|
42
|
+
# defaults to HEAD.
|
|
43
|
+
def initialize(*args)
|
|
44
|
+
super
|
|
45
|
+
@tag = "HEAD" if @tag.nil?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# (String) Type of the repository, here 'svn'.
|
|
49
|
+
def repo_type
|
|
50
|
+
:svn
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Return a unique identifier for this revision in this repository.
|
|
54
|
+
#
|
|
55
|
+
# === Returns
|
|
56
|
+
# String:: opaque unique ID for this revision in this repository
|
|
57
|
+
def checkout_hash
|
|
58
|
+
digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}\000#{tag}")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Convert this repository to a URL in the style of resource URLs.
|
|
62
|
+
#
|
|
63
|
+
# === Returns
|
|
64
|
+
# URI:: URL representing this repository
|
|
65
|
+
def to_url
|
|
66
|
+
if first_credential
|
|
67
|
+
uri = add_users_to(url, first_credential, second_credential)
|
|
68
|
+
else
|
|
69
|
+
uri = URI.parse(url)
|
|
70
|
+
end
|
|
71
|
+
uri
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Instantiate retriever for this kind of repository
|
|
75
|
+
#
|
|
76
|
+
# === Options
|
|
77
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
|
78
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
|
79
|
+
# <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
|
|
80
|
+
# <tt>:logger</tt>:: Logger to use
|
|
81
|
+
#
|
|
82
|
+
# === Return
|
|
83
|
+
# retriever(Retrivers::Svn):: Retriever for this repository
|
|
84
|
+
def retriever(options)
|
|
85
|
+
RightScraper::Retrievers::Svn.new(self, options)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Add this repository to the list of available types.
|
|
89
|
+
@@types[:svn] = RightScraper::Repositories::Svn
|
|
90
|
+
|
|
91
|
+
# Add git URL schemas to the list of okay schemas.
|
|
92
|
+
@@okay_schemes << "svn"
|
|
93
|
+
@@okay_schemes << "svn+ssh"
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
require 'digest/sha1'
|
|
24
|
+
require 'uri'
|
|
25
|
+
|
|
26
|
+
module RightScraper
|
|
27
|
+
|
|
28
|
+
module Resources
|
|
29
|
+
|
|
30
|
+
# Localized representation of a resource. Contains the resource
|
|
31
|
+
# contents, and the metadata as a hash. A resource at its core is any
|
|
32
|
+
# abstraction that is statically represented by a set of files and
|
|
33
|
+
# directories and metadata.
|
|
34
|
+
#
|
|
35
|
+
# The JSON metadata for the resource is in #metadata, and the
|
|
36
|
+
# manifest is in #manifest.
|
|
37
|
+
class Base
|
|
38
|
+
|
|
39
|
+
# (Repositories::Base) Repository the resource was fetched from.
|
|
40
|
+
attr_reader :repository
|
|
41
|
+
|
|
42
|
+
# (Hash) Metadata from the resource.
|
|
43
|
+
attr_accessor :metadata
|
|
44
|
+
|
|
45
|
+
# (Hash) Manifest for resource. A hash of path => SHA-1 digests.
|
|
46
|
+
attr_accessor :manifest
|
|
47
|
+
|
|
48
|
+
# (String) Position in the repository.
|
|
49
|
+
attr_accessor :pos
|
|
50
|
+
|
|
51
|
+
# Create a new resource from the given parameters.
|
|
52
|
+
#
|
|
53
|
+
# === Parameters
|
|
54
|
+
# repo(Repositories::Base):: Repository containing this resource
|
|
55
|
+
def initialize(repo, pos)
|
|
56
|
+
@repository = repo
|
|
57
|
+
@pos = pos
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Resource hash
|
|
61
|
+
#
|
|
62
|
+
# === Return
|
|
63
|
+
# hash(String):: Hexadecimal value that uniquely identifies this resource
|
|
64
|
+
def resource_hash
|
|
65
|
+
Digest::SHA1.hexdigest("#{PROTOCOL_VERSION}\000#{@repository.checkout_hash}\000#{@pos}")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
#--
|
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
3
|
#
|
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
5
|
# a copy of this software and associated documentation files (the
|
|
6
6
|
# 'Software'), to deal in the Software without restriction, including
|
|
7
7
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
9
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
10
|
# the following conditions:
|
|
11
11
|
#
|
|
12
12
|
# The above copyright notice and this permission notice shall be
|
|
13
13
|
# included in all copies or substantial portions of the Software.
|
|
14
14
|
#
|
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
16
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
17
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
18
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
@@ -21,26 +21,12 @@
|
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
22
|
#++
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
require 'scraper_base'
|
|
24
|
+
module RightScraper
|
|
26
25
|
|
|
27
|
-
|
|
26
|
+
module Resources
|
|
27
|
+
|
|
28
|
+
class Cookbook < Base
|
|
29
|
+
end
|
|
28
30
|
|
|
29
|
-
before(:each) do
|
|
30
|
-
@base = RightScale::ScraperBase.new('/tmp', max_bytes=1024**2, max_seconds=20)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
it 'should initialize the scrape directory' do
|
|
34
|
-
@base.root_dir.should == '/tmp'
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
it 'should default to non incremental updates' do
|
|
38
|
-
@base.send(:incremental_update?).should be_false
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
it 'should allow retrieving the download directory path' do
|
|
42
|
-
repo_dir = RightScale::ScraperBase.repo_dir('root_dir', { :repo_type => :git, :url => 'git://github.com/rightscale/right_scraper.git' })
|
|
43
|
-
repo_dir.should =~ /^root_dir\//
|
|
44
31
|
end
|
|
45
|
-
|
|
46
|
-
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScraper
|
|
25
|
+
|
|
26
|
+
module Resources
|
|
27
|
+
|
|
28
|
+
class Workflow < Base
|
|
29
|
+
|
|
30
|
+
METADATA_EXT = '.meta'
|
|
31
|
+
DEFINITION_EXT = '.def'
|
|
32
|
+
|
|
33
|
+
# Relative path to definition file
|
|
34
|
+
# @pos must be set before this can be called
|
|
35
|
+
#
|
|
36
|
+
# === Return
|
|
37
|
+
# path(String):: Path to definition file
|
|
38
|
+
def definition_path
|
|
39
|
+
path = @pos
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Relative path to metadata file
|
|
43
|
+
# @pos must be set before this can be called
|
|
44
|
+
#
|
|
45
|
+
# === Return
|
|
46
|
+
# path(String):: Path to metadata file
|
|
47
|
+
def metadata_path
|
|
48
|
+
path = @pos.chomp(File.extname(@pos)) + METADATA_EXT if @pos
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScraper
|
|
25
|
+
module Retrievers
|
|
26
|
+
# Base class for all retrievers.
|
|
27
|
+
#
|
|
28
|
+
# Retrievers fetch remote repositories into a given path
|
|
29
|
+
# They will attempt to fetch incrementally when possible (e.g. leveraging
|
|
30
|
+
# the underlying source control management system incremental capabilities)
|
|
31
|
+
class Base
|
|
32
|
+
|
|
33
|
+
# Integer:: optional maximum size permitted for repositories
|
|
34
|
+
attr_accessor :max_bytes
|
|
35
|
+
|
|
36
|
+
# Integer:: optional maximum number of seconds for any single
|
|
37
|
+
# retrieve operation.
|
|
38
|
+
attr_accessor :max_seconds
|
|
39
|
+
|
|
40
|
+
# RightScraper::Repositories::Base:: repository currently being retrieved
|
|
41
|
+
attr_reader :repository
|
|
42
|
+
|
|
43
|
+
# String:: Path to directory where files are retrieved
|
|
44
|
+
attr_reader :repo_dir
|
|
45
|
+
|
|
46
|
+
# Create a new retriever for the given repository. This class
|
|
47
|
+
# recognizes several options, and subclasses may recognize
|
|
48
|
+
# additional options. Options may never be required.
|
|
49
|
+
#
|
|
50
|
+
# === Options
|
|
51
|
+
# <tt>:basedir</tt>:: Required, base directory where all files should be retrieved
|
|
52
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
|
53
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
|
54
|
+
# <tt>:logger</tt>:: Logger to use
|
|
55
|
+
#
|
|
56
|
+
# === Parameters
|
|
57
|
+
# repository(RightScraper::Repositories::Base):: repository to scrape
|
|
58
|
+
# options(Hash):: retriever options
|
|
59
|
+
#
|
|
60
|
+
# === Raise
|
|
61
|
+
# 'Missing base directory':: if :basedir option is missing
|
|
62
|
+
def initialize(repository, options={})
|
|
63
|
+
raise 'Missing base directory' unless options[:basedir]
|
|
64
|
+
@repository = repository
|
|
65
|
+
@max_bytes = options[:max_bytes] || nil
|
|
66
|
+
@max_seconds = options[:max_seconds] || nil
|
|
67
|
+
@basedir = options[:basedir]
|
|
68
|
+
@repo_dir = RightScraper::Retrievers::Base.repo_dir(@basedir, repository)
|
|
69
|
+
@logger = options[:logger] || RightScraper::Logger.new
|
|
70
|
+
@logger.repository = repository
|
|
71
|
+
@logger.operation(:initialize, "setting up in #{@repo_dir}") do
|
|
72
|
+
FileUtils.mkdir_p(@repo_dir)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Paths to ignore when traversing the filesystem. Mostly used for
|
|
77
|
+
# things like Git and Subversion version control directories.
|
|
78
|
+
#
|
|
79
|
+
# === Return
|
|
80
|
+
# list(Array):: list of filenames to ignore.
|
|
81
|
+
def ignorable_paths
|
|
82
|
+
[]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Retrieve repository, overridden in heirs
|
|
86
|
+
def retrieve
|
|
87
|
+
raise NotImplementedError
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Path to directory where given repo should be or was downloaded
|
|
91
|
+
#
|
|
92
|
+
# === Parameters
|
|
93
|
+
# root_dir(String):: Path to directory containing all scraped repositories
|
|
94
|
+
# repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
|
|
95
|
+
#
|
|
96
|
+
# === Return
|
|
97
|
+
# String:: Path to local directory that corresponds to given repository
|
|
98
|
+
def self.repo_dir(root_dir, repo)
|
|
99
|
+
repo = RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
|
|
100
|
+
dir_name = repo.repository_hash
|
|
101
|
+
dir_path = File.join(root_dir, dir_name)
|
|
102
|
+
"#{dir_path}/repo"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
protected
|
|
106
|
+
|
|
107
|
+
# (Hash) Lookup table from textual description of scraper type
|
|
108
|
+
# ('cookbook' or 'workflow' currently) to the class that
|
|
109
|
+
# represents that scraper.
|
|
110
|
+
@@types = {} unless class_variable_defined?(:@@types)
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScraper
|
|
25
|
+
module Retrievers
|
|
26
|
+
|
|
27
|
+
# Base class for retrievers that want to do version control
|
|
28
|
+
# operations (CVS, SVN, etc.). Subclasses can get away with
|
|
29
|
+
# implementing only #do_checkout but to support incremental
|
|
30
|
+
# operation need to implement #exists? and #do_update, in addition
|
|
31
|
+
# to Retrievers::Base#ignorable_paths.
|
|
32
|
+
class CheckoutBasedRetriever < Base
|
|
33
|
+
|
|
34
|
+
# Check out repository into the directory. Occurs between
|
|
35
|
+
# variable initialization and beginning scraping.
|
|
36
|
+
def retrieve
|
|
37
|
+
if exists?
|
|
38
|
+
begin
|
|
39
|
+
@logger.operation(:updating) do
|
|
40
|
+
do_update
|
|
41
|
+
end
|
|
42
|
+
rescue
|
|
43
|
+
@logger.note_error($!, :updating, "switching to using checkout")
|
|
44
|
+
FileUtils.remove_entry_secure @repo_dir
|
|
45
|
+
@logger.operation(:checkout) do
|
|
46
|
+
do_checkout
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
else
|
|
50
|
+
@logger.operation(:checkout) do
|
|
51
|
+
do_checkout
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Return true if a checkout exists.
|
|
57
|
+
#
|
|
58
|
+
# === Returns
|
|
59
|
+
# Boolean:: true if the checkout already exists (and thus
|
|
60
|
+
# incremental updating can occur).
|
|
61
|
+
def exists?
|
|
62
|
+
false
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Perform an incremental update of the checkout. Subclasses that
|
|
66
|
+
# want to handle incremental updating need to override this.
|
|
67
|
+
def do_update
|
|
68
|
+
do_checkout
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Perform a de novo full checkout of the repository. Subclasses
|
|
72
|
+
# must override this to do anything useful.
|
|
73
|
+
def do_checkout
|
|
74
|
+
FileUtils.mkdir_p(@repo_dir)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
require 'process_watcher'
|
|
24
|
+
require 'tempfile'
|
|
25
|
+
require 'digest/sha1'
|
|
26
|
+
|
|
27
|
+
module RightScraper
|
|
28
|
+
module Retrievers
|
|
29
|
+
# A retriever for resources stored in archives on a web server
|
|
30
|
+
# somewhere. Uses command line curl and command line tar.
|
|
31
|
+
class Download < Base
|
|
32
|
+
|
|
33
|
+
# Directory used to download tarballs
|
|
34
|
+
def workdir
|
|
35
|
+
File.join(@basedir, @repository.repository_hash)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Path to directory where files are retrieved
|
|
39
|
+
def repo_dir
|
|
40
|
+
File.join(workdir, "archive")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Download tarball and unpack it
|
|
44
|
+
def retrieve
|
|
45
|
+
FileUtils.remove_entry_secure workdir if File.exists?(workdir)
|
|
46
|
+
FileUtils.mkdir_p repo_dir
|
|
47
|
+
file = File.join(workdir, "package")
|
|
48
|
+
|
|
49
|
+
@logger.operation(:downloading) do
|
|
50
|
+
credential_command = if @repository.first_credential && @repository.second_credential
|
|
51
|
+
["-u", "#{@repository.first_credential}:#{@repository.second_credential}"]
|
|
52
|
+
else
|
|
53
|
+
[]
|
|
54
|
+
end
|
|
55
|
+
ProcessWatcher.watch("curl", ["--silent", "--show-error", "--location", "--fail",
|
|
56
|
+
"--location-trusted", "-o", file,
|
|
57
|
+
credential_command, @repository.url].flatten,
|
|
58
|
+
workdir, @max_bytes || -1, @max_seconds || -1) do |phase, command, exception|
|
|
59
|
+
@logger.note_phase(phase, :running_command, command, exception)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
note_tag(file)
|
|
64
|
+
|
|
65
|
+
@logger.operation(:unpacking) do
|
|
66
|
+
path = @repository.to_url.path
|
|
67
|
+
if path =~ /\.gz$/
|
|
68
|
+
extraction = "xzf"
|
|
69
|
+
elsif path =~ /\.bz2$/
|
|
70
|
+
extraction = "xjf"
|
|
71
|
+
else
|
|
72
|
+
extraction = "xf"
|
|
73
|
+
end
|
|
74
|
+
Dir.chdir(repo_dir) do
|
|
75
|
+
ProcessWatcher.watch("tar", [extraction, file], repo_dir,
|
|
76
|
+
@max_bytes || -1, @max_seconds || -1) do |phase, command, exception|
|
|
77
|
+
@logger.note_phase(phase, :running_command, command, exception)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Amend @repository with the tag information from the downloaded
|
|
84
|
+
# file.
|
|
85
|
+
#
|
|
86
|
+
# === Parameters
|
|
87
|
+
# file(String):: file that was downloaded
|
|
88
|
+
def note_tag(file)
|
|
89
|
+
digest = Digest::SHA1.new
|
|
90
|
+
File.open(file) {|f| digest << f.read(4096) }
|
|
91
|
+
repo = @repository.clone
|
|
92
|
+
repo.tag = digest.hexdigest
|
|
93
|
+
@repository = repo
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|