right_scraper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +81 -0
- data/Rakefile +74 -0
- data/lib/right_scraper/repository.rb +66 -0
- data/lib/right_scraper/scraper.rb +96 -0
- data/lib/right_scraper/scraper_base.rb +99 -0
- data/lib/right_scraper/scrapers/download_scraper.rb +58 -0
- data/lib/right_scraper/scrapers/git_scraper.rb +168 -0
- data/lib/right_scraper/scrapers/svn_scraper.rb +77 -0
- data/lib/right_scraper.rb +31 -0
- data/right_scraper.gemspec +55 -0
- data/spec/download_scraper_spec.rb +87 -0
- data/spec/git_scraper_spec.rb +108 -0
- data/spec/rcov.opts +1 -0
- data/spec/repository_spec.rb +45 -0
- data/spec/scraper_base_spec.rb +41 -0
- data/spec/scraper_spec.rb +67 -0
- data/spec/spec_helper.rb +111 -0
- data/spec/svn_scraper_spec.rb +104 -0
- metadata +76 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 RightScale, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
'Software'), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
= RightScraper
|
2
|
+
|
3
|
+
== DESCRIPTION
|
4
|
+
|
5
|
+
=== Synopsis
|
6
|
+
|
7
|
+
RightScraper provides a simple interface to download and keep repositories up-to-date
|
8
|
+
using various protocols.
|
9
|
+
|
10
|
+
The supported protocols include:
|
11
|
+
- *git*: RightScraper will clone then pull repos from git
|
12
|
+
- *SVN*: RightScraper will checkout then update SVN repositories
|
13
|
+
- *tarballs*: Includes uncompressed (.tar), gzip (.tgz, .gzip) and bzip (.bzip, .bzip2) tar files.
|
14
|
+
|
15
|
+
The scraper first inspects the local directory to see if the repo has already been scraped
|
16
|
+
and if so runs some basic checks before it tries to update it. Incremental updates are not
|
17
|
+
supported with tar files.
|
18
|
+
|
19
|
+
=== Rationale
|
20
|
+
|
21
|
+
The idea is to have many repos that need to be downloaded/kept up-to-date in a central
|
22
|
+
place. Point the scraper to this central place and it will take care of creating unique
|
23
|
+
local directories for each remote repository and keep that mapping to download changes
|
24
|
+
incrementally upon request.
|
25
|
+
|
26
|
+
== USAGE
|
27
|
+
|
28
|
+
=== Simple Example
|
29
|
+
|
30
|
+
require 'rubygems'
|
31
|
+
require 'right_scraper'
|
32
|
+
|
33
|
+
scraper = RightScale::Scraper.new('/tmp')
|
34
|
+
scraper.scrape(:type => :git, :url => 'git://github.com/rightscale/right_scraper.git')
|
35
|
+
|
36
|
+
== INSTALLATION
|
37
|
+
|
38
|
+
RightScraper can be installed by entering the following at the command prompt:
|
39
|
+
|
40
|
+
gem install right_scraper
|
41
|
+
|
42
|
+
== DEPENDENCIES
|
43
|
+
|
44
|
+
RightScraper relies on the following tools:
|
45
|
+
- git
|
46
|
+
- svn
|
47
|
+
- curl
|
48
|
+
|
49
|
+
== TESTING
|
50
|
+
|
51
|
+
Install the following RubyGems required for testing:
|
52
|
+
- rspec
|
53
|
+
|
54
|
+
The build can be tested using the RSpec gem.
|
55
|
+
|
56
|
+
rake spec
|
57
|
+
|
58
|
+
== LICENSE
|
59
|
+
|
60
|
+
<b>RightScraper</b>
|
61
|
+
|
62
|
+
Copyright:: Copyright (c) 2010 RightScale, Inc.
|
63
|
+
|
64
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
65
|
+
a copy of this software and associated documentation files (the
|
66
|
+
'Software'), to deal in the Software without restriction, including
|
67
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
68
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
69
|
+
permit persons to whom the Software is furnished to do so, subject to
|
70
|
+
the following conditions:
|
71
|
+
|
72
|
+
The above copyright notice and this permission notice shall be
|
73
|
+
included in all copies or substantial portions of the Software.
|
74
|
+
|
75
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
76
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
77
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
78
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
79
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
80
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
81
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'fileutils'
|
26
|
+
require 'rake'
|
27
|
+
require 'spec/rake/spectask'
|
28
|
+
|
29
|
+
task :default => 'spec'
|
30
|
+
|
31
|
+
# == Unit Tests == #
|
32
|
+
|
33
|
+
desc "Run unit tests"
|
34
|
+
Spec::Rake::SpecTask.new do |t|
|
35
|
+
t.spec_files = Dir['**/*_spec.rb']
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "Run unit tests with RCov"
|
39
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
40
|
+
t.spec_files = Dir['**/*_spec.rb']
|
41
|
+
t.rcov = true
|
42
|
+
t.rcov_opts = lambda do
|
43
|
+
IO.readlines(File.join(File.dirname(__FILE__), 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
desc "Print Specdoc for unit tests"
|
48
|
+
Spec::Rake::SpecTask.new(:doc) do |t|
|
49
|
+
t.spec_opts = ["--format", "specdoc", "--dry-run"]
|
50
|
+
t.spec_files = Dir['**/*_spec.rb']
|
51
|
+
end
|
52
|
+
|
53
|
+
# == Gem Management == #
|
54
|
+
|
55
|
+
desc "Build right_scraper gem"
|
56
|
+
task :gem do
|
57
|
+
ruby 'right_scraper.gemspec'
|
58
|
+
pkg_dir = File.join(File.dirname(__FILE__), 'pkg')
|
59
|
+
FileUtils.mkdir_p(pkg_dir)
|
60
|
+
FileUtils.mv(Dir.glob(File.join(File.dirname(__FILE__), '*.gem')), pkg_dir)
|
61
|
+
end
|
62
|
+
|
63
|
+
desc 'Install the right_scraper library as a gem'
|
64
|
+
task :install => [:gem] do
|
65
|
+
file = Dir["pkg/*.gem"].last
|
66
|
+
sh "gem install #{file}"
|
67
|
+
end
|
68
|
+
|
69
|
+
desc 'Uninstalls and reinstalls the right_scraper library as a gem'
|
70
|
+
task :reinstall do
|
71
|
+
sh "gem uninstall right_scraper"
|
72
|
+
sh "rake install"
|
73
|
+
end
|
74
|
+
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module RightScale
|
25
|
+
|
26
|
+
# Description of remote repository that needs to be scraped.
|
27
|
+
class Repository
|
28
|
+
|
29
|
+
# (String) Human readable repository name used for progress reports
|
30
|
+
attr_accessor :display_name
|
31
|
+
|
32
|
+
# (String) One of 'git', 'svn' or 'download'
|
33
|
+
attr_accessor :repo_type
|
34
|
+
|
35
|
+
# (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
|
36
|
+
attr_accessor :url
|
37
|
+
|
38
|
+
# (String) Optional, tag or branch of repository that should be downloaded
|
39
|
+
# Not used for 'download' repositories
|
40
|
+
attr_accessor :tag
|
41
|
+
|
42
|
+
# (String) Optional, SVN username or git private SSH key content
|
43
|
+
attr_accessor :first_credential
|
44
|
+
|
45
|
+
# (String) Optional, SVN password
|
46
|
+
attr_accessor :second_credential
|
47
|
+
|
48
|
+
# Initialize repository from given hash
|
49
|
+
# Hash keys should correspond to attributes of this class
|
50
|
+
#
|
51
|
+
# === Parameters
|
52
|
+
# opts(Hash):: Hash to be converted into a RightScale::Repository instance
|
53
|
+
#
|
54
|
+
# === Return
|
55
|
+
# repo(RightScale::Repository):: Resulting repository instance
|
56
|
+
def self.from_hash(opts)
|
57
|
+
repo = RightScale::Repository.new
|
58
|
+
opts.each do |k, v|
|
59
|
+
repo.__send__("#{k.to_s}=".to_sym, v)
|
60
|
+
end
|
61
|
+
repo
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module RightScale
|
25
|
+
|
26
|
+
# Hash of repository types associated with corresponding scraper class
|
27
|
+
SCRAPERS = { 'git' => RightScale::GitScraper,
|
28
|
+
'svn' => RightScale::SvnScraper,
|
29
|
+
'download' => RightScale::DownloadScraper }
|
30
|
+
|
31
|
+
# Library main entry point. Instantiate this class and call the scrape
|
32
|
+
# method to download or update a remote repository to the local disk.
|
33
|
+
class Scraper
|
34
|
+
|
35
|
+
# (String) Path to directory where remote repository was downloaded
|
36
|
+
# Note: This will be a subfolder of the scrape directory (directory given to initializer)
|
37
|
+
attr_reader :repo_dir
|
38
|
+
|
39
|
+
# Initialize scrape destination directory
|
40
|
+
#
|
41
|
+
# === Parameters
|
42
|
+
# scrape_dir(String):: Scrape destination directory
|
43
|
+
def initialize(scrape_dir)
|
44
|
+
@scrape_dir = scrape_dir
|
45
|
+
@scrapers = {}
|
46
|
+
end
|
47
|
+
|
48
|
+
# Scrape given repository.
|
49
|
+
# Create unique directory inside scrape directory when called for the first time.
|
50
|
+
# Update content of unique directory incrementally when possible with further calls.
|
51
|
+
#
|
52
|
+
# === Parameters
|
53
|
+
# repo(Hash|RightScale::Repository):: Repository to be scraped
|
54
|
+
# Note: repo can either be a Hash or a RightScale::Repo instance.
|
55
|
+
# See the RightScale::Repo class for valid Hash keys.
|
56
|
+
#
|
57
|
+
# === Block
|
58
|
+
# If a block is given, it will be called back with progress information
|
59
|
+
# the block should take two arguments:
|
60
|
+
# - first argument is the string containing the info
|
61
|
+
# - second argument is a boolean indicating whether to increment progress
|
62
|
+
# The block is called exactly once with the increment flag set to true
|
63
|
+
#
|
64
|
+
# === Return
|
65
|
+
# true:: If scrape was successful
|
66
|
+
# false:: If scrape failed, call error_message for information on failure
|
67
|
+
#
|
68
|
+
# === Raise
|
69
|
+
# 'Invalid repository type':: If repository type is not known
|
70
|
+
def scrape(repo, &callback)
|
71
|
+
repo = RightScale::Repository.from_hash(repo) if repo.is_a?(Hash)
|
72
|
+
raise "Invalid repository type" unless SCRAPERS.include?(repo.repo_type)
|
73
|
+
@scraper = @scrapers[repo.repo_type] ||= SCRAPERS[repo.repo_type].new(@scrape_dir)
|
74
|
+
@scraper.scrape(repo, &callback)
|
75
|
+
@repo_dir = @scraper.repo_dir
|
76
|
+
@scraper.succeeded?
|
77
|
+
end
|
78
|
+
|
79
|
+
# Error messages in case of failure
|
80
|
+
#
|
81
|
+
# === Return
|
82
|
+
# errors(Array):: Error messages or empty array if no error
|
83
|
+
def errors
|
84
|
+
errors = @scraper && @scraper.errors || []
|
85
|
+
end
|
86
|
+
|
87
|
+
# Was scraping successful?
|
88
|
+
# Call error_message to get error messages if false
|
89
|
+
#
|
90
|
+
# === Return
|
91
|
+
# succeeded(Boolean):: true if scrape finished with no error, false otherwise.
|
92
|
+
def succeeded?
|
93
|
+
succeeded = @errors.nil? || @errors.size == 0
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'digest/md5'
|
25
|
+
|
26
|
+
module RightScale
|
27
|
+
|
28
|
+
# Base class for all scrapers.
|
29
|
+
# Actual scraper implementation should override scrape_imp and optionally incremental_update?
|
30
|
+
class ScraperBase
|
31
|
+
|
32
|
+
# (String) Path to directory containing all scraped repositories
|
33
|
+
attr_accessor :root_dir
|
34
|
+
|
35
|
+
# (RightScale::Repository) Last scraped repository
|
36
|
+
attr_reader :repo
|
37
|
+
|
38
|
+
# (Array) Error messages if any
|
39
|
+
attr_reader :errors
|
40
|
+
|
41
|
+
# (String) Path to local directory where repository was downloaded
|
42
|
+
attr_reader :repo_dir
|
43
|
+
|
44
|
+
# Set path to directory containing all scraped repos
|
45
|
+
#
|
46
|
+
# === Parameters
|
47
|
+
# root_dir(String):: Path to scraped repos parent directory
|
48
|
+
def initialize(root_dir)
|
49
|
+
@root_dir = root_dir
|
50
|
+
end
|
51
|
+
|
52
|
+
# Common implementation of scrape method for all repository types.
|
53
|
+
# Each scraper implementation should override scrape_imp which is called
|
54
|
+
# after this method initializes all the scraper attributes properly.
|
55
|
+
# See RightScale::Scraper#scrape
|
56
|
+
def scrape(repo, &callback)
|
57
|
+
@repo = repo
|
58
|
+
@callback = callback
|
59
|
+
@scrape_dir_name = Digest::MD5.hexdigest(repo.to_s)
|
60
|
+
@scrape_dir_path = File.join(root_dir, @scrape_dir_name)
|
61
|
+
@repo_dir = "#{@scrape_dir_path}/repo"
|
62
|
+
@incremental = incremental_update?
|
63
|
+
@errors = []
|
64
|
+
FileUtils.rm_rf(@repo_dir) unless @incremental
|
65
|
+
scrape_imp
|
66
|
+
true
|
67
|
+
end
|
68
|
+
|
69
|
+
# Was last call to scrapesuccessful?
|
70
|
+
# Call errors to get error messages if false
|
71
|
+
#
|
72
|
+
# === Return
|
73
|
+
# succeeded(TrueClass|FalseClass):: true if scrape finished with no error, false otherwise.
|
74
|
+
def succeeded?
|
75
|
+
succeeded = @errors.nil? || @errors.size == 0
|
76
|
+
end
|
77
|
+
|
78
|
+
protected
|
79
|
+
|
80
|
+
# Check whether it is possible to perform an incremental update of the repo
|
81
|
+
#
|
82
|
+
# === Return
|
83
|
+
# true:: Scrape directory contains files belonging to the scraped repo and protocol supports
|
84
|
+
# incremental updates
|
85
|
+
# false:: Otherwise
|
86
|
+
def incremental_update?
|
87
|
+
false # Incremental updates not supported by default
|
88
|
+
end
|
89
|
+
|
90
|
+
# Override this method with scraper specific implementation in descendants
|
91
|
+
#
|
92
|
+
# === Return
|
93
|
+
# true:: Always return true
|
94
|
+
def scrape_imp
|
95
|
+
raise "Method not implemented"
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module RightScale
|
25
|
+
|
26
|
+
class DownloadScraper < ScraperBase
|
27
|
+
|
28
|
+
# Download and expand remote repository, see RightScale::ScraperBase#scrape
|
29
|
+
#
|
30
|
+
# === Return
|
31
|
+
# true:: Always return true
|
32
|
+
def scrape_imp
|
33
|
+
msg = "Downloading repository '#{@repo.display_name}'"
|
34
|
+
@callback.call(msg, is_step=true) if @callback
|
35
|
+
filename = @repo.url.split('/').last
|
36
|
+
user_opt = @repo.first_credential && @repo.second_credential ? "--user #{@repo.first_credential}:#{@repo.second_credential}" : ''
|
37
|
+
cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output '#{@repo_dir}/#{filename}' '#{@repo.url}' 2>&1"
|
38
|
+
FileUtils.mkdir_p(@repo_dir)
|
39
|
+
res = `#{cmd}`
|
40
|
+
@errors << res if $? != 0
|
41
|
+
if succeeded?
|
42
|
+
unzip_opt = case @repo.url[/\.(.*)$/]
|
43
|
+
when 'bzip', 'bzip2' then 'j'
|
44
|
+
when 'tgz', 'gzip' then 'z'
|
45
|
+
else ''
|
46
|
+
end
|
47
|
+
Dir.chdir(@repo_dir) do
|
48
|
+
cmd = "tar x#{unzip_opt}f #{filename} 2>&1"
|
49
|
+
res = `#{cmd}`
|
50
|
+
@errors << res if $? != 0
|
51
|
+
File.delete(filename)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
true
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module RightScale
|
25
|
+
|
26
|
+
class GitScraper < ScraperBase
|
27
|
+
|
28
|
+
# Check whether it is possible to perform an incremental update of the repo
|
29
|
+
#
|
30
|
+
# === Return
|
31
|
+
# true:: Scrape directory contains files belonging to the scraped repo and protocol supports
|
32
|
+
# incremental updates
|
33
|
+
# false:: Otherwise
|
34
|
+
def incremental_update?
|
35
|
+
return false unless File.directory?(@repo_dir)
|
36
|
+
Dir.chdir(@repo_dir) do
|
37
|
+
remote_url = `git config --get remote.origin.url`.chomp
|
38
|
+
$?.success? && remote_url == @repo.url
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Scrape git repository, see RightScale::ScraperBase#scrape
|
43
|
+
#
|
44
|
+
# === Return
|
45
|
+
# true:: Always return true
|
46
|
+
def scrape_imp
|
47
|
+
msg = @incremental ? "Pulling " : "Cloning "
|
48
|
+
msg += "git repository '#{@repo.display_name}'"
|
49
|
+
@callback.call(msg, is_step=true) if @callback
|
50
|
+
ssh_cmd = ssh_command
|
51
|
+
res = ""
|
52
|
+
is_tag = nil
|
53
|
+
is_branch = nil
|
54
|
+
|
55
|
+
if @incremental
|
56
|
+
Dir.chdir(@repo_dir) do
|
57
|
+
is_tag, is_branch, res = git_tag_kind(ssh_cmd)
|
58
|
+
if !is_tag && !is_branch
|
59
|
+
@callback.call("Nothing to update: repo tag refers to neither a branch nor a tag", is_step=false)
|
60
|
+
return true
|
61
|
+
end
|
62
|
+
if is_tag && is_branch
|
63
|
+
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
64
|
+
else
|
65
|
+
tag = @repo.tag.nil? || @repo.tag.empty? ? 'master' : @repo.tag
|
66
|
+
res += `#{ssh_cmd} git pull --quiet --depth 1 origin #{tag} 2>&1`
|
67
|
+
if $? != 0
|
68
|
+
@callback.call("Failed to pull repo: #{res}, falling back to cloning", is_step=false) if @callback
|
69
|
+
FileUtils.rm_rf(@repo_dir)
|
70
|
+
@incremental = false
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
if !@incremental
|
76
|
+
res += `#{ssh_cmd} git clone --quiet --depth 1 #{@repo.url} #{@repo_dir} 2>&1`
|
77
|
+
@errors << res if $? != 0
|
78
|
+
if !@repo.tag.nil? && !@repo.tag.empty? && @repo.tag != 'master' && succeeded?
|
79
|
+
Dir.chdir(@repo_dir) do
|
80
|
+
if is_tag.nil?
|
81
|
+
is_tag, is_branch, out = git_tag_kind(ssh_cmd)
|
82
|
+
res += out
|
83
|
+
end
|
84
|
+
if is_tag && is_branch
|
85
|
+
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
86
|
+
elsif is_branch
|
87
|
+
res += `git branch #{@repo.tag} origin/#{@repo.tag} 2>&1`
|
88
|
+
@errors << res if $? != 0
|
89
|
+
elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
|
90
|
+
res += `#{ssh_cmd} git fetch origin master --depth #{2**31 - 1} 2>&1`
|
91
|
+
@errors << res if $? != 0
|
92
|
+
end
|
93
|
+
if succeeded?
|
94
|
+
res += `git checkout #{@repo.tag} 2>&1`
|
95
|
+
@errors << res if $? != 0
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
true
|
101
|
+
end
|
102
|
+
|
103
|
+
# Default SSH options used with git
|
104
|
+
DEFAULT_SSH_OPTIONS = { :PasswordAuthentication => 'no',
|
105
|
+
:HostbasedAuthentication => 'no',
|
106
|
+
:StrictHostKeyChecking => 'no',
|
107
|
+
:IdentitiesOnly => 'yes' }
|
108
|
+
|
109
|
+
# SSH options command line built from default options and given custom options
|
110
|
+
#
|
111
|
+
# === Parameters
|
112
|
+
# opts(Hash):: Custom options
|
113
|
+
#
|
114
|
+
# === Return
|
115
|
+
# options(String):: SSH command line options
|
116
|
+
def ssh_options(opts={})
|
117
|
+
opts = DEFAULT_SSH_OPTIONS.merge(opts || {})
|
118
|
+
options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
|
119
|
+
end
|
120
|
+
|
121
|
+
# Store public SSH key into temporary folder and create temporary script
|
122
|
+
# that wraps SSH and uses this key.
|
123
|
+
#
|
124
|
+
# === Return
|
125
|
+
# ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
|
126
|
+
def ssh_command
|
127
|
+
ssh_dir = File.join(@scrape_dir_path, '.ssh')
|
128
|
+
FileUtils.mkdir_p(ssh_dir)
|
129
|
+
key_content = @repo.first_credential
|
130
|
+
if key_content.nil?
|
131
|
+
# Explicitely disable public key authentication so we don't endup using the system's key
|
132
|
+
options = { :PubkeyAuthentication => 'no' }
|
133
|
+
else
|
134
|
+
ssh_key_path = File.join(ssh_dir, 'ssh.pub')
|
135
|
+
File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
|
136
|
+
File.chmod(0600, ssh_key_path)
|
137
|
+
options = { :IdentityFile => ssh_key_path }
|
138
|
+
end
|
139
|
+
ssh_config = File.join(ssh_dir, 'ssh_config')
|
140
|
+
File.open(ssh_config, 'w') { |f| f.puts(ssh_options(options)) }
|
141
|
+
ssh = File.join(ssh_dir, 'ssh')
|
142
|
+
File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
|
143
|
+
File.chmod(0755, ssh)
|
144
|
+
"GIT_SSH=#{ssh}"
|
145
|
+
end
|
146
|
+
|
147
|
+
# Resolves whehter repository tag is a git tag or a git branch
|
148
|
+
# Return output of run commands too
|
149
|
+
# Note:: Assume that current working directory is a git directory
|
150
|
+
#
|
151
|
+
# === Parameters
|
152
|
+
# ssh_cmd<String>:: SSH command to be used with git if any
|
153
|
+
#
|
154
|
+
# === Return
|
155
|
+
# res<Array>::
|
156
|
+
# - res[0] is true if git repo has a tag with a name corresponding to the repository tag
|
157
|
+
# - res[1] is true if git repo has a branch with a name corresponding to the repository tag
|
158
|
+
# - res[2] contains the git output
|
159
|
+
def git_tag_kind(ssh_cmd)
|
160
|
+
return [ false, true, "" ] if @repo.tag.nil? || @repo.tag.empty? || @repo.tag == 'master'
|
161
|
+
output = `#{ssh_cmd} git fetch --tags --depth 1 2>&1`
|
162
|
+
is_tag = `git tag`.split("\n").include?(@repo.tag)
|
163
|
+
is_branch = `git branch -r`.split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
|
164
|
+
res = [ is_tag, is_branch, output ]
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
end
|