right_scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 RightScale, Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ 'Software'), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,81 @@
1
+ = RightScraper
2
+
3
+ == DESCRIPTION
4
+
5
+ === Synopsis
6
+
7
+ RightScraper provides a simple interface to download and keep repositories up-to-date
8
+ using various protocols.
9
+
10
+ The supported protocols include:
11
+ - *git*: RightScraper will clone then pull repos from git
12
+ - *SVN*: RightScraper will checkout then update SVN repositories
13
+ - *tarballs*: Includes uncompressed (.tar), gzip (.tgz, .gzip) and bzip (.bzip, .bzip2) tar files.
14
+
15
+ The scraper first inspects the local directory to see if the repo has already been scraped
16
+ and if so runs some basic checks before it tries to update it. Incremental updates are not
17
+ supported with tar files.
18
+
19
+ === Rationale
20
+
21
+ The idea is to have many repos that need to be downloaded/kept up-to-date in a central
22
+ place. Point the scraper to this central place and it will take care of creating unique
23
+ local directories for each remote repository and keep that mapping to download changes
24
+ incrementally upon request.
25
+
26
+ == USAGE
27
+
28
+ === Simple Example
29
+
30
+ require 'rubygems'
31
+ require 'right_scraper'
32
+
33
+ scraper = RightScale::Scraper.new('/tmp')
34
+ scraper.scrape(:type => :git, :url => 'git://github.com/rightscale/right_scraper.git')
35
+
36
+ == INSTALLATION
37
+
38
+ RightScraper can be installed by entering the following at the command prompt:
39
+
40
+ gem install right_scraper
41
+
42
+ == DEPENDENCIES
43
+
44
+ RightScraper relies on the following tools:
45
+ - git
46
+ - svn
47
+ - curl
48
+
49
+ == TESTING
50
+
51
+ Install the following RubyGems required for testing:
52
+ - rspec
53
+
54
+ The build can be tested using the RSpec gem.
55
+
56
+ rake spec
57
+
58
+ == LICENSE
59
+
60
+ <b>RightScraper</b>
61
+
62
+ Copyright:: Copyright (c) 2010 RightScale, Inc.
63
+
64
+ Permission is hereby granted, free of charge, to any person obtaining
65
+ a copy of this software and associated documentation files (the
66
+ 'Software'), to deal in the Software without restriction, including
67
+ without limitation the rights to use, copy, modify, merge, publish,
68
+ distribute, sublicense, and/or sell copies of the Software, and to
69
+ permit persons to whom the Software is furnished to do so, subject to
70
+ the following conditions:
71
+
72
+ The above copyright notice and this permission notice shall be
73
+ included in all copies or substantial portions of the Software.
74
+
75
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
76
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
77
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
78
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
79
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
80
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
81
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,74 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'rubygems'
25
+ require 'fileutils'
26
+ require 'rake'
27
+ require 'spec/rake/spectask'
28
+
29
+ task :default => 'spec'
30
+
31
+ # == Unit Tests == #
32
+
33
+ desc "Run unit tests"
34
+ Spec::Rake::SpecTask.new do |t|
35
+ t.spec_files = Dir['**/*_spec.rb']
36
+ end
37
+
38
+ desc "Run unit tests with RCov"
39
+ Spec::Rake::SpecTask.new(:rcov) do |t|
40
+ t.spec_files = Dir['**/*_spec.rb']
41
+ t.rcov = true
42
+ t.rcov_opts = lambda do
43
+ IO.readlines(File.join(File.dirname(__FILE__), 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
44
+ end
45
+ end
46
+
47
+ desc "Print Specdoc for unit tests"
48
+ Spec::Rake::SpecTask.new(:doc) do |t|
49
+ t.spec_opts = ["--format", "specdoc", "--dry-run"]
50
+ t.spec_files = Dir['**/*_spec.rb']
51
+ end
52
+
53
+ # == Gem Management == #
54
+
55
+ desc "Build right_scraper gem"
56
+ task :gem do
57
+ ruby 'right_scraper.gemspec'
58
+ pkg_dir = File.join(File.dirname(__FILE__), 'pkg')
59
+ FileUtils.mkdir_p(pkg_dir)
60
+ FileUtils.mv(Dir.glob(File.join(File.dirname(__FILE__), '*.gem')), pkg_dir)
61
+ end
62
+
63
+ desc 'Install the right_scraper library as a gem'
64
+ task :install => [:gem] do
65
+ file = Dir["pkg/*.gem"].last
66
+ sh "gem install #{file}"
67
+ end
68
+
69
+ desc 'Uninstalls and reinstalls the right_scraper library as a gem'
70
+ task :reinstall do
71
+ sh "gem uninstall right_scraper"
72
+ sh "rake install"
73
+ end
74
+
@@ -0,0 +1,66 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScale
25
+
26
+ # Description of remote repository that needs to be scraped.
27
+ class Repository
28
+
29
+ # (String) Human readable repository name used for progress reports
30
+ attr_accessor :display_name
31
+
32
+ # (String) One of 'git', 'svn' or 'download'
33
+ attr_accessor :repo_type
34
+
35
+ # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
36
+ attr_accessor :url
37
+
38
+ # (String) Optional, tag or branch of repository that should be downloaded
39
+ # Not used for 'download' repositories
40
+ attr_accessor :tag
41
+
42
+ # (String) Optional, SVN username or git private SSH key content
43
+ attr_accessor :first_credential
44
+
45
+ # (String) Optional, SVN password
46
+ attr_accessor :second_credential
47
+
48
+ # Initialize repository from given hash
49
+ # Hash keys should correspond to attributes of this class
50
+ #
51
+ # === Parameters
52
+ # opts(Hash):: Hash to be converted into a RightScale::Repository instance
53
+ #
54
+ # === Return
55
+ # repo(RightScale::Repository):: Resulting repository instance
56
+ def self.from_hash(opts)
57
+ repo = RightScale::Repository.new
58
+ opts.each do |k, v|
59
+ repo.__send__("#{k.to_s}=".to_sym, v)
60
+ end
61
+ repo
62
+ end
63
+
64
+ end
65
+
66
+ end
@@ -0,0 +1,96 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScale
25
+
26
+ # Hash of repository types associated with corresponding scraper class
27
+ SCRAPERS = { 'git' => RightScale::GitScraper,
28
+ 'svn' => RightScale::SvnScraper,
29
+ 'download' => RightScale::DownloadScraper }
30
+
31
+ # Library main entry point. Instantiate this class and call the scrape
32
+ # method to download or update a remote repository to the local disk.
33
+ class Scraper
34
+
35
+ # (String) Path to directory where remote repository was downloaded
36
+ # Note: This will be a subfolder of the scrape directory (directory given to initializer)
37
+ attr_reader :repo_dir
38
+
39
+ # Initialize scrape destination directory
40
+ #
41
+ # === Parameters
42
+ # scrape_dir(String):: Scrape destination directory
43
+ def initialize(scrape_dir)
44
+ @scrape_dir = scrape_dir
45
+ @scrapers = {}
46
+ end
47
+
48
+ # Scrape given repository.
49
+ # Create unique directory inside scrape directory when called for the first time.
50
+ # Update content of unique directory incrementally when possible with further calls.
51
+ #
52
+ # === Parameters
53
+ # repo(Hash|RightScale::Repository):: Repository to be scraped
54
+ # Note: repo can either be a Hash or a RightScale::Repo instance.
55
+ # See the RightScale::Repo class for valid Hash keys.
56
+ #
57
+ # === Block
58
+ # If a block is given, it will be called back with progress information
59
+ # the block should take two arguments:
60
+ # - first argument is the string containing the info
61
+ # - second argument is a boolean indicating whether to increment progress
62
+ # The block is called exactly once with the increment flag set to true
63
+ #
64
+ # === Return
65
+ # true:: If scrape was successful
66
+ # false:: If scrape failed, call error_message for information on failure
67
+ #
68
+ # === Raise
69
+ # 'Invalid repository type':: If repository type is not known
70
+ def scrape(repo, &callback)
71
+ repo = RightScale::Repository.from_hash(repo) if repo.is_a?(Hash)
72
+ raise "Invalid repository type" unless SCRAPERS.include?(repo.repo_type)
73
+ @scraper = @scrapers[repo.repo_type] ||= SCRAPERS[repo.repo_type].new(@scrape_dir)
74
+ @scraper.scrape(repo, &callback)
75
+ @repo_dir = @scraper.repo_dir
76
+ @scraper.succeeded?
77
+ end
78
+
79
+ # Error messages in case of failure
80
+ #
81
+ # === Return
82
+ # errors(Array):: Error messages or empty array if no error
83
+ def errors
84
+ errors = @scraper && @scraper.errors || []
85
+ end
86
+
87
+ # Was scraping successful?
88
+ # Call error_message to get error messages if false
89
+ #
90
+ # === Return
91
+ # succeeded(Boolean):: true if scrape finished with no error, false otherwise.
92
+ def succeeded?
93
+ succeeded = @errors.nil? || @errors.size == 0
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,99 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'digest/md5'
25
+
26
+ module RightScale
27
+
28
+ # Base class for all scrapers.
29
+ # Actual scraper implementation should override scrape_imp and optionally incremental_update?
30
+ class ScraperBase
31
+
32
+ # (String) Path to directory containing all scraped repositories
33
+ attr_accessor :root_dir
34
+
35
+ # (RightScale::Repository) Last scraped repository
36
+ attr_reader :repo
37
+
38
+ # (Array) Error messages if any
39
+ attr_reader :errors
40
+
41
+ # (String) Path to local directory where repository was downloaded
42
+ attr_reader :repo_dir
43
+
44
+ # Set path to directory containing all scraped repos
45
+ #
46
+ # === Parameters
47
+ # root_dir(String):: Path to scraped repos parent directory
48
+ def initialize(root_dir)
49
+ @root_dir = root_dir
50
+ end
51
+
52
+ # Common implementation of scrape method for all repository types.
53
+ # Each scraper implementation should override scrape_imp which is called
54
+ # after this method initializes all the scraper attributes properly.
55
+ # See RightScale::Scraper#scrape
56
+ def scrape(repo, &callback)
57
+ @repo = repo
58
+ @callback = callback
59
+ @scrape_dir_name = Digest::MD5.hexdigest(repo.to_s)
60
+ @scrape_dir_path = File.join(root_dir, @scrape_dir_name)
61
+ @repo_dir = "#{@scrape_dir_path}/repo"
62
+ @incremental = incremental_update?
63
+ @errors = []
64
+ FileUtils.rm_rf(@repo_dir) unless @incremental
65
+ scrape_imp
66
+ true
67
+ end
68
+
69
+ # Was last call to scrapesuccessful?
70
+ # Call errors to get error messages if false
71
+ #
72
+ # === Return
73
+ # succeeded(TrueClass|FalseClass):: true if scrape finished with no error, false otherwise.
74
+ def succeeded?
75
+ succeeded = @errors.nil? || @errors.size == 0
76
+ end
77
+
78
+ protected
79
+
80
+ # Check whether it is possible to perform an incremental update of the repo
81
+ #
82
+ # === Return
83
+ # true:: Scrape directory contains files belonging to the scraped repo and protocol supports
84
+ # incremental updates
85
+ # false:: Otherwise
86
+ def incremental_update?
87
+ false # Incremental updates not supported by default
88
+ end
89
+
90
+ # Override this method with scraper specific implementation in descendants
91
+ #
92
+ # === Return
93
+ # true:: Always return true
94
+ def scrape_imp
95
+ raise "Method not implemented"
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,58 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScale
25
+
26
+ class DownloadScraper < ScraperBase
27
+
28
+ # Download and expand remote repository, see RightScale::ScraperBase#scrape
29
+ #
30
+ # === Return
31
+ # true:: Always return true
32
+ def scrape_imp
33
+ msg = "Downloading repository '#{@repo.display_name}'"
34
+ @callback.call(msg, is_step=true) if @callback
35
+ filename = @repo.url.split('/').last
36
+ user_opt = @repo.first_credential && @repo.second_credential ? "--user #{@repo.first_credential}:#{@repo.second_credential}" : ''
37
+ cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output '#{@repo_dir}/#{filename}' '#{@repo.url}' 2>&1"
38
+ FileUtils.mkdir_p(@repo_dir)
39
+ res = `#{cmd}`
40
+ @errors << res if $? != 0
41
+ if succeeded?
42
+ unzip_opt = case @repo.url[/\.(.*)$/]
43
+ when 'bzip', 'bzip2' then 'j'
44
+ when 'tgz', 'gzip' then 'z'
45
+ else ''
46
+ end
47
+ Dir.chdir(@repo_dir) do
48
+ cmd = "tar x#{unzip_opt}f #{filename} 2>&1"
49
+ res = `#{cmd}`
50
+ @errors << res if $? != 0
51
+ File.delete(filename)
52
+ end
53
+ end
54
+ true
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,168 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScale
25
+
26
+ class GitScraper < ScraperBase
27
+
28
+ # Check whether it is possible to perform an incremental update of the repo
29
+ #
30
+ # === Return
31
+ # true:: Scrape directory contains files belonging to the scraped repo and protocol supports
32
+ # incremental updates
33
+ # false:: Otherwise
34
+ def incremental_update?
35
+ return false unless File.directory?(@repo_dir)
36
+ Dir.chdir(@repo_dir) do
37
+ remote_url = `git config --get remote.origin.url`.chomp
38
+ $?.success? && remote_url == @repo.url
39
+ end
40
+ end
41
+
42
+ # Scrape git repository, see RightScale::ScraperBase#scrape
43
+ #
44
+ # === Return
45
+ # true:: Always return true
46
+ def scrape_imp
47
+ msg = @incremental ? "Pulling " : "Cloning "
48
+ msg += "git repository '#{@repo.display_name}'"
49
+ @callback.call(msg, is_step=true) if @callback
50
+ ssh_cmd = ssh_command
51
+ res = ""
52
+ is_tag = nil
53
+ is_branch = nil
54
+
55
+ if @incremental
56
+ Dir.chdir(@repo_dir) do
57
+ is_tag, is_branch, res = git_tag_kind(ssh_cmd)
58
+ if !is_tag && !is_branch
59
+ @callback.call("Nothing to update: repo tag refers to neither a branch nor a tag", is_step=false)
60
+ return true
61
+ end
62
+ if is_tag && is_branch
63
+ @errors << 'Repository tag ambiguous: could be git tag or git branch'
64
+ else
65
+ tag = @repo.tag.nil? || @repo.tag.empty? ? 'master' : @repo.tag
66
+ res += `#{ssh_cmd} git pull --quiet --depth 1 origin #{tag} 2>&1`
67
+ if $? != 0
68
+ @callback.call("Failed to pull repo: #{res}, falling back to cloning", is_step=false) if @callback
69
+ FileUtils.rm_rf(@repo_dir)
70
+ @incremental = false
71
+ end
72
+ end
73
+ end
74
+ end
75
+ if !@incremental
76
+ res += `#{ssh_cmd} git clone --quiet --depth 1 #{@repo.url} #{@repo_dir} 2>&1`
77
+ @errors << res if $? != 0
78
+ if !@repo.tag.nil? && !@repo.tag.empty? && @repo.tag != 'master' && succeeded?
79
+ Dir.chdir(@repo_dir) do
80
+ if is_tag.nil?
81
+ is_tag, is_branch, out = git_tag_kind(ssh_cmd)
82
+ res += out
83
+ end
84
+ if is_tag && is_branch
85
+ @errors << 'Repository tag ambiguous: could be git tag or git branch'
86
+ elsif is_branch
87
+ res += `git branch #{@repo.tag} origin/#{@repo.tag} 2>&1`
88
+ @errors << res if $? != 0
89
+ elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
90
+ res += `#{ssh_cmd} git fetch origin master --depth #{2**31 - 1} 2>&1`
91
+ @errors << res if $? != 0
92
+ end
93
+ if succeeded?
94
+ res += `git checkout #{@repo.tag} 2>&1`
95
+ @errors << res if $? != 0
96
+ end
97
+ end
98
+ end
99
+ end
100
+ true
101
+ end
102
+
103
+ # Default SSH options used with git
104
+ DEFAULT_SSH_OPTIONS = { :PasswordAuthentication => 'no',
105
+ :HostbasedAuthentication => 'no',
106
+ :StrictHostKeyChecking => 'no',
107
+ :IdentitiesOnly => 'yes' }
108
+
109
+ # SSH options command line built from default options and given custom options
110
+ #
111
+ # === Parameters
112
+ # opts(Hash):: Custom options
113
+ #
114
+ # === Return
115
+ # options(String):: SSH command line options
116
+ def ssh_options(opts={})
117
+ opts = DEFAULT_SSH_OPTIONS.merge(opts || {})
118
+ options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
119
+ end
120
+
121
+ # Store public SSH key into temporary folder and create temporary script
122
+ # that wraps SSH and uses this key.
123
+ #
124
+ # === Return
125
+ # ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
126
+ def ssh_command
127
+ ssh_dir = File.join(@scrape_dir_path, '.ssh')
128
+ FileUtils.mkdir_p(ssh_dir)
129
+ key_content = @repo.first_credential
130
+ if key_content.nil?
131
+ # Explicitely disable public key authentication so we don't endup using the system's key
132
+ options = { :PubkeyAuthentication => 'no' }
133
+ else
134
+ ssh_key_path = File.join(ssh_dir, 'ssh.pub')
135
+ File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
136
+ File.chmod(0600, ssh_key_path)
137
+ options = { :IdentityFile => ssh_key_path }
138
+ end
139
+ ssh_config = File.join(ssh_dir, 'ssh_config')
140
+ File.open(ssh_config, 'w') { |f| f.puts(ssh_options(options)) }
141
+ ssh = File.join(ssh_dir, 'ssh')
142
+ File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
143
+ File.chmod(0755, ssh)
144
+ "GIT_SSH=#{ssh}"
145
+ end
146
+
147
+ # Resolves whehter repository tag is a git tag or a git branch
148
+ # Return output of run commands too
149
+ # Note:: Assume that current working directory is a git directory
150
+ #
151
+ # === Parameters
152
+ # ssh_cmd<String>:: SSH command to be used with git if any
153
+ #
154
+ # === Return
155
+ # res<Array>::
156
+ # - res[0] is true if git repo has a tag with a name corresponding to the repository tag
157
+ # - res[1] is true if git repo has a branch with a name corresponding to the repository tag
158
+ # - res[2] contains the git output
159
+ def git_tag_kind(ssh_cmd)
160
+ return [ false, true, "" ] if @repo.tag.nil? || @repo.tag.empty? || @repo.tag == 'master'
161
+ output = `#{ssh_cmd} git fetch --tags --depth 1 2>&1`
162
+ is_tag = `git tag`.split("\n").include?(@repo.tag)
163
+ is_branch = `git branch -r`.split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
164
+ res = [ is_tag, is_branch, output ]
165
+ end
166
+
167
+ end
168
+ end