right_scraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +81 -0
- data/Rakefile +74 -0
- data/lib/right_scraper/repository.rb +66 -0
- data/lib/right_scraper/scraper.rb +96 -0
- data/lib/right_scraper/scraper_base.rb +99 -0
- data/lib/right_scraper/scrapers/download_scraper.rb +58 -0
- data/lib/right_scraper/scrapers/git_scraper.rb +168 -0
- data/lib/right_scraper/scrapers/svn_scraper.rb +77 -0
- data/lib/right_scraper.rb +31 -0
- data/right_scraper.gemspec +55 -0
- data/spec/download_scraper_spec.rb +87 -0
- data/spec/git_scraper_spec.rb +108 -0
- data/spec/rcov.opts +1 -0
- data/spec/repository_spec.rb +45 -0
- data/spec/scraper_base_spec.rb +41 -0
- data/spec/scraper_spec.rb +67 -0
- data/spec/spec_helper.rb +111 -0
- data/spec/svn_scraper_spec.rb +104 -0
- metadata +76 -0
data/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2010 RightScale, Inc.
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
'Software'), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
17
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
18
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
19
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
20
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
= RightScraper
|
|
2
|
+
|
|
3
|
+
== DESCRIPTION
|
|
4
|
+
|
|
5
|
+
=== Synopsis
|
|
6
|
+
|
|
7
|
+
RightScraper provides a simple interface to download and keep repositories up-to-date
|
|
8
|
+
using various protocols.
|
|
9
|
+
|
|
10
|
+
The supported protocols include:
|
|
11
|
+
- *git*: RightScraper will clone then pull repos from git
|
|
12
|
+
- *SVN*: RightScraper will checkout then update SVN repositories
|
|
13
|
+
- *tarballs*: Includes uncompressed (.tar), gzip (.tgz, .gzip) and bzip (.bzip, .bzip2) tar files.
|
|
14
|
+
|
|
15
|
+
The scraper first inspects the local directory to see if the repo has already been scraped
|
|
16
|
+
and if so runs some basic checks before it tries to update it. Incremental updates are not
|
|
17
|
+
supported with tar files.
|
|
18
|
+
|
|
19
|
+
=== Rationale
|
|
20
|
+
|
|
21
|
+
The idea is to have many repos that need to be downloaded/kept up-to-date in a central
|
|
22
|
+
place. Point the scraper to this central place and it will take care of creating unique
|
|
23
|
+
local directories for each remote repository and keep that mapping to download changes
|
|
24
|
+
incrementally upon request.
|
|
25
|
+
|
|
26
|
+
== USAGE
|
|
27
|
+
|
|
28
|
+
=== Simple Example
|
|
29
|
+
|
|
30
|
+
require 'rubygems'
|
|
31
|
+
require 'right_scraper'
|
|
32
|
+
|
|
33
|
+
scraper = RightScale::Scraper.new('/tmp')
|
|
34
|
+
scraper.scrape(:type => :git, :url => 'git://github.com/rightscale/right_scraper.git')
|
|
35
|
+
|
|
36
|
+
== INSTALLATION
|
|
37
|
+
|
|
38
|
+
RightScraper can be installed by entering the following at the command prompt:
|
|
39
|
+
|
|
40
|
+
gem install right_scraper
|
|
41
|
+
|
|
42
|
+
== DEPENDENCIES
|
|
43
|
+
|
|
44
|
+
RightScraper relies on the following tools:
|
|
45
|
+
- git
|
|
46
|
+
- svn
|
|
47
|
+
- curl
|
|
48
|
+
|
|
49
|
+
== TESTING
|
|
50
|
+
|
|
51
|
+
Install the following RubyGems required for testing:
|
|
52
|
+
- rspec
|
|
53
|
+
|
|
54
|
+
The build can be tested using the RSpec gem.
|
|
55
|
+
|
|
56
|
+
rake spec
|
|
57
|
+
|
|
58
|
+
== LICENSE
|
|
59
|
+
|
|
60
|
+
<b>RightScraper</b>
|
|
61
|
+
|
|
62
|
+
Copyright:: Copyright (c) 2010 RightScale, Inc.
|
|
63
|
+
|
|
64
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
65
|
+
a copy of this software and associated documentation files (the
|
|
66
|
+
'Software'), to deal in the Software without restriction, including
|
|
67
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
68
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
69
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
70
|
+
the following conditions:
|
|
71
|
+
|
|
72
|
+
The above copyright notice and this permission notice shall be
|
|
73
|
+
included in all copies or substantial portions of the Software.
|
|
74
|
+
|
|
75
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
76
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
77
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
78
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
79
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
80
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
81
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
require 'rubygems'
|
|
25
|
+
require 'fileutils'
|
|
26
|
+
require 'rake'
|
|
27
|
+
require 'spec/rake/spectask'
|
|
28
|
+
|
|
29
|
+
task :default => 'spec'
|
|
30
|
+
|
|
31
|
+
# == Unit Tests == #
|
|
32
|
+
|
|
33
|
+
desc "Run unit tests"
|
|
34
|
+
Spec::Rake::SpecTask.new do |t|
|
|
35
|
+
t.spec_files = Dir['**/*_spec.rb']
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
desc "Run unit tests with RCov"
|
|
39
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
|
40
|
+
t.spec_files = Dir['**/*_spec.rb']
|
|
41
|
+
t.rcov = true
|
|
42
|
+
t.rcov_opts = lambda do
|
|
43
|
+
IO.readlines(File.join(File.dirname(__FILE__), 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
desc "Print Specdoc for unit tests"
|
|
48
|
+
Spec::Rake::SpecTask.new(:doc) do |t|
|
|
49
|
+
t.spec_opts = ["--format", "specdoc", "--dry-run"]
|
|
50
|
+
t.spec_files = Dir['**/*_spec.rb']
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# == Gem Management == #
|
|
54
|
+
|
|
55
|
+
desc "Build right_scraper gem"
|
|
56
|
+
task :gem do
|
|
57
|
+
ruby 'right_scraper.gemspec'
|
|
58
|
+
pkg_dir = File.join(File.dirname(__FILE__), 'pkg')
|
|
59
|
+
FileUtils.mkdir_p(pkg_dir)
|
|
60
|
+
FileUtils.mv(Dir.glob(File.join(File.dirname(__FILE__), '*.gem')), pkg_dir)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
desc 'Install the right_scraper library as a gem'
|
|
64
|
+
task :install => [:gem] do
|
|
65
|
+
file = Dir["pkg/*.gem"].last
|
|
66
|
+
sh "gem install #{file}"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
desc 'Uninstalls and reinstalls the right_scraper library as a gem'
|
|
70
|
+
task :reinstall do
|
|
71
|
+
sh "gem uninstall right_scraper"
|
|
72
|
+
sh "rake install"
|
|
73
|
+
end
|
|
74
|
+
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScale
|
|
25
|
+
|
|
26
|
+
# Description of remote repository that needs to be scraped.
|
|
27
|
+
class Repository
|
|
28
|
+
|
|
29
|
+
# (String) Human readable repository name used for progress reports
|
|
30
|
+
attr_accessor :display_name
|
|
31
|
+
|
|
32
|
+
# (String) One of 'git', 'svn' or 'download'
|
|
33
|
+
attr_accessor :repo_type
|
|
34
|
+
|
|
35
|
+
# (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
|
|
36
|
+
attr_accessor :url
|
|
37
|
+
|
|
38
|
+
# (String) Optional, tag or branch of repository that should be downloaded
|
|
39
|
+
# Not used for 'download' repositories
|
|
40
|
+
attr_accessor :tag
|
|
41
|
+
|
|
42
|
+
# (String) Optional, SVN username or git private SSH key content
|
|
43
|
+
attr_accessor :first_credential
|
|
44
|
+
|
|
45
|
+
# (String) Optional, SVN password
|
|
46
|
+
attr_accessor :second_credential
|
|
47
|
+
|
|
48
|
+
# Initialize repository from given hash
|
|
49
|
+
# Hash keys should correspond to attributes of this class
|
|
50
|
+
#
|
|
51
|
+
# === Parameters
|
|
52
|
+
# opts(Hash):: Hash to be converted into a RightScale::Repository instance
|
|
53
|
+
#
|
|
54
|
+
# === Return
|
|
55
|
+
# repo(RightScale::Repository):: Resulting repository instance
|
|
56
|
+
def self.from_hash(opts)
|
|
57
|
+
repo = RightScale::Repository.new
|
|
58
|
+
opts.each do |k, v|
|
|
59
|
+
repo.__send__("#{k.to_s}=".to_sym, v)
|
|
60
|
+
end
|
|
61
|
+
repo
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScale
|
|
25
|
+
|
|
26
|
+
# Hash of repository types associated with corresponding scraper class
|
|
27
|
+
SCRAPERS = { 'git' => RightScale::GitScraper,
|
|
28
|
+
'svn' => RightScale::SvnScraper,
|
|
29
|
+
'download' => RightScale::DownloadScraper }
|
|
30
|
+
|
|
31
|
+
# Library main entry point. Instantiate this class and call the scrape
|
|
32
|
+
# method to download or update a remote repository to the local disk.
|
|
33
|
+
class Scraper
|
|
34
|
+
|
|
35
|
+
# (String) Path to directory where remote repository was downloaded
|
|
36
|
+
# Note: This will be a subfolder of the scrape directory (directory given to initializer)
|
|
37
|
+
attr_reader :repo_dir
|
|
38
|
+
|
|
39
|
+
# Initialize scrape destination directory
|
|
40
|
+
#
|
|
41
|
+
# === Parameters
|
|
42
|
+
# scrape_dir(String):: Scrape destination directory
|
|
43
|
+
def initialize(scrape_dir)
|
|
44
|
+
@scrape_dir = scrape_dir
|
|
45
|
+
@scrapers = {}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Scrape given repository.
|
|
49
|
+
# Create unique directory inside scrape directory when called for the first time.
|
|
50
|
+
# Update content of unique directory incrementally when possible with further calls.
|
|
51
|
+
#
|
|
52
|
+
# === Parameters
|
|
53
|
+
# repo(Hash|RightScale::Repository):: Repository to be scraped
|
|
54
|
+
# Note: repo can either be a Hash or a RightScale::Repo instance.
|
|
55
|
+
# See the RightScale::Repo class for valid Hash keys.
|
|
56
|
+
#
|
|
57
|
+
# === Block
|
|
58
|
+
# If a block is given, it will be called back with progress information
|
|
59
|
+
# the block should take two arguments:
|
|
60
|
+
# - first argument is the string containing the info
|
|
61
|
+
# - second argument is a boolean indicating whether to increment progress
|
|
62
|
+
# The block is called exactly once with the increment flag set to true
|
|
63
|
+
#
|
|
64
|
+
# === Return
|
|
65
|
+
# true:: If scrape was successful
|
|
66
|
+
# false:: If scrape failed, call error_message for information on failure
|
|
67
|
+
#
|
|
68
|
+
# === Raise
|
|
69
|
+
# 'Invalid repository type':: If repository type is not known
|
|
70
|
+
def scrape(repo, &callback)
|
|
71
|
+
repo = RightScale::Repository.from_hash(repo) if repo.is_a?(Hash)
|
|
72
|
+
raise "Invalid repository type" unless SCRAPERS.include?(repo.repo_type)
|
|
73
|
+
@scraper = @scrapers[repo.repo_type] ||= SCRAPERS[repo.repo_type].new(@scrape_dir)
|
|
74
|
+
@scraper.scrape(repo, &callback)
|
|
75
|
+
@repo_dir = @scraper.repo_dir
|
|
76
|
+
@scraper.succeeded?
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Error messages in case of failure
|
|
80
|
+
#
|
|
81
|
+
# === Return
|
|
82
|
+
# errors(Array):: Error messages or empty array if no error
|
|
83
|
+
def errors
|
|
84
|
+
errors = @scraper && @scraper.errors || []
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Was scraping successful?
|
|
88
|
+
# Call error_message to get error messages if false
|
|
89
|
+
#
|
|
90
|
+
# === Return
|
|
91
|
+
# succeeded(Boolean):: true if scrape finished with no error, false otherwise.
|
|
92
|
+
def succeeded?
|
|
93
|
+
succeeded = @errors.nil? || @errors.size == 0
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
require 'digest/md5'
|
|
25
|
+
|
|
26
|
+
module RightScale
|
|
27
|
+
|
|
28
|
+
# Base class for all scrapers.
|
|
29
|
+
# Actual scraper implementation should override scrape_imp and optionally incremental_update?
|
|
30
|
+
class ScraperBase
|
|
31
|
+
|
|
32
|
+
# (String) Path to directory containing all scraped repositories
|
|
33
|
+
attr_accessor :root_dir
|
|
34
|
+
|
|
35
|
+
# (RightScale::Repository) Last scraped repository
|
|
36
|
+
attr_reader :repo
|
|
37
|
+
|
|
38
|
+
# (Array) Error messages if any
|
|
39
|
+
attr_reader :errors
|
|
40
|
+
|
|
41
|
+
# (String) Path to local directory where repository was downloaded
|
|
42
|
+
attr_reader :repo_dir
|
|
43
|
+
|
|
44
|
+
# Set path to directory containing all scraped repos
|
|
45
|
+
#
|
|
46
|
+
# === Parameters
|
|
47
|
+
# root_dir(String):: Path to scraped repos parent directory
|
|
48
|
+
def initialize(root_dir)
|
|
49
|
+
@root_dir = root_dir
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Common implementation of scrape method for all repository types.
|
|
53
|
+
# Each scraper implementation should override scrape_imp which is called
|
|
54
|
+
# after this method initializes all the scraper attributes properly.
|
|
55
|
+
# See RightScale::Scraper#scrape
|
|
56
|
+
def scrape(repo, &callback)
|
|
57
|
+
@repo = repo
|
|
58
|
+
@callback = callback
|
|
59
|
+
@scrape_dir_name = Digest::MD5.hexdigest(repo.to_s)
|
|
60
|
+
@scrape_dir_path = File.join(root_dir, @scrape_dir_name)
|
|
61
|
+
@repo_dir = "#{@scrape_dir_path}/repo"
|
|
62
|
+
@incremental = incremental_update?
|
|
63
|
+
@errors = []
|
|
64
|
+
FileUtils.rm_rf(@repo_dir) unless @incremental
|
|
65
|
+
scrape_imp
|
|
66
|
+
true
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Was last call to scrapesuccessful?
|
|
70
|
+
# Call errors to get error messages if false
|
|
71
|
+
#
|
|
72
|
+
# === Return
|
|
73
|
+
# succeeded(TrueClass|FalseClass):: true if scrape finished with no error, false otherwise.
|
|
74
|
+
def succeeded?
|
|
75
|
+
succeeded = @errors.nil? || @errors.size == 0
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
protected
|
|
79
|
+
|
|
80
|
+
# Check whether it is possible to perform an incremental update of the repo
|
|
81
|
+
#
|
|
82
|
+
# === Return
|
|
83
|
+
# true:: Scrape directory contains files belonging to the scraped repo and protocol supports
|
|
84
|
+
# incremental updates
|
|
85
|
+
# false:: Otherwise
|
|
86
|
+
def incremental_update?
|
|
87
|
+
false # Incremental updates not supported by default
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Override this method with scraper specific implementation in descendants
|
|
91
|
+
#
|
|
92
|
+
# === Return
|
|
93
|
+
# true:: Always return true
|
|
94
|
+
def scrape_imp
|
|
95
|
+
raise "Method not implemented"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScale
|
|
25
|
+
|
|
26
|
+
class DownloadScraper < ScraperBase
|
|
27
|
+
|
|
28
|
+
# Download and expand remote repository, see RightScale::ScraperBase#scrape
|
|
29
|
+
#
|
|
30
|
+
# === Return
|
|
31
|
+
# true:: Always return true
|
|
32
|
+
def scrape_imp
|
|
33
|
+
msg = "Downloading repository '#{@repo.display_name}'"
|
|
34
|
+
@callback.call(msg, is_step=true) if @callback
|
|
35
|
+
filename = @repo.url.split('/').last
|
|
36
|
+
user_opt = @repo.first_credential && @repo.second_credential ? "--user #{@repo.first_credential}:#{@repo.second_credential}" : ''
|
|
37
|
+
cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output '#{@repo_dir}/#{filename}' '#{@repo.url}' 2>&1"
|
|
38
|
+
FileUtils.mkdir_p(@repo_dir)
|
|
39
|
+
res = `#{cmd}`
|
|
40
|
+
@errors << res if $? != 0
|
|
41
|
+
if succeeded?
|
|
42
|
+
unzip_opt = case @repo.url[/\.(.*)$/]
|
|
43
|
+
when 'bzip', 'bzip2' then 'j'
|
|
44
|
+
when 'tgz', 'gzip' then 'z'
|
|
45
|
+
else ''
|
|
46
|
+
end
|
|
47
|
+
Dir.chdir(@repo_dir) do
|
|
48
|
+
cmd = "tar x#{unzip_opt}f #{filename} 2>&1"
|
|
49
|
+
res = `#{cmd}`
|
|
50
|
+
@errors << res if $? != 0
|
|
51
|
+
File.delete(filename)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
true
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScale
|
|
25
|
+
|
|
26
|
+
class GitScraper < ScraperBase
|
|
27
|
+
|
|
28
|
+
# Check whether it is possible to perform an incremental update of the repo
|
|
29
|
+
#
|
|
30
|
+
# === Return
|
|
31
|
+
# true:: Scrape directory contains files belonging to the scraped repo and protocol supports
|
|
32
|
+
# incremental updates
|
|
33
|
+
# false:: Otherwise
|
|
34
|
+
def incremental_update?
|
|
35
|
+
return false unless File.directory?(@repo_dir)
|
|
36
|
+
Dir.chdir(@repo_dir) do
|
|
37
|
+
remote_url = `git config --get remote.origin.url`.chomp
|
|
38
|
+
$?.success? && remote_url == @repo.url
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Scrape git repository, see RightScale::ScraperBase#scrape
|
|
43
|
+
#
|
|
44
|
+
# === Return
|
|
45
|
+
# true:: Always return true
|
|
46
|
+
def scrape_imp
|
|
47
|
+
msg = @incremental ? "Pulling " : "Cloning "
|
|
48
|
+
msg += "git repository '#{@repo.display_name}'"
|
|
49
|
+
@callback.call(msg, is_step=true) if @callback
|
|
50
|
+
ssh_cmd = ssh_command
|
|
51
|
+
res = ""
|
|
52
|
+
is_tag = nil
|
|
53
|
+
is_branch = nil
|
|
54
|
+
|
|
55
|
+
if @incremental
|
|
56
|
+
Dir.chdir(@repo_dir) do
|
|
57
|
+
is_tag, is_branch, res = git_tag_kind(ssh_cmd)
|
|
58
|
+
if !is_tag && !is_branch
|
|
59
|
+
@callback.call("Nothing to update: repo tag refers to neither a branch nor a tag", is_step=false)
|
|
60
|
+
return true
|
|
61
|
+
end
|
|
62
|
+
if is_tag && is_branch
|
|
63
|
+
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
|
64
|
+
else
|
|
65
|
+
tag = @repo.tag.nil? || @repo.tag.empty? ? 'master' : @repo.tag
|
|
66
|
+
res += `#{ssh_cmd} git pull --quiet --depth 1 origin #{tag} 2>&1`
|
|
67
|
+
if $? != 0
|
|
68
|
+
@callback.call("Failed to pull repo: #{res}, falling back to cloning", is_step=false) if @callback
|
|
69
|
+
FileUtils.rm_rf(@repo_dir)
|
|
70
|
+
@incremental = false
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
if !@incremental
|
|
76
|
+
res += `#{ssh_cmd} git clone --quiet --depth 1 #{@repo.url} #{@repo_dir} 2>&1`
|
|
77
|
+
@errors << res if $? != 0
|
|
78
|
+
if !@repo.tag.nil? && !@repo.tag.empty? && @repo.tag != 'master' && succeeded?
|
|
79
|
+
Dir.chdir(@repo_dir) do
|
|
80
|
+
if is_tag.nil?
|
|
81
|
+
is_tag, is_branch, out = git_tag_kind(ssh_cmd)
|
|
82
|
+
res += out
|
|
83
|
+
end
|
|
84
|
+
if is_tag && is_branch
|
|
85
|
+
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
|
86
|
+
elsif is_branch
|
|
87
|
+
res += `git branch #{@repo.tag} origin/#{@repo.tag} 2>&1`
|
|
88
|
+
@errors << res if $? != 0
|
|
89
|
+
elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
|
|
90
|
+
res += `#{ssh_cmd} git fetch origin master --depth #{2**31 - 1} 2>&1`
|
|
91
|
+
@errors << res if $? != 0
|
|
92
|
+
end
|
|
93
|
+
if succeeded?
|
|
94
|
+
res += `git checkout #{@repo.tag} 2>&1`
|
|
95
|
+
@errors << res if $? != 0
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
true
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Default SSH options used with git
|
|
104
|
+
DEFAULT_SSH_OPTIONS = { :PasswordAuthentication => 'no',
|
|
105
|
+
:HostbasedAuthentication => 'no',
|
|
106
|
+
:StrictHostKeyChecking => 'no',
|
|
107
|
+
:IdentitiesOnly => 'yes' }
|
|
108
|
+
|
|
109
|
+
# SSH options command line built from default options and given custom options
|
|
110
|
+
#
|
|
111
|
+
# === Parameters
|
|
112
|
+
# opts(Hash):: Custom options
|
|
113
|
+
#
|
|
114
|
+
# === Return
|
|
115
|
+
# options(String):: SSH command line options
|
|
116
|
+
def ssh_options(opts={})
|
|
117
|
+
opts = DEFAULT_SSH_OPTIONS.merge(opts || {})
|
|
118
|
+
options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Store public SSH key into temporary folder and create temporary script
|
|
122
|
+
# that wraps SSH and uses this key.
|
|
123
|
+
#
|
|
124
|
+
# === Return
|
|
125
|
+
# ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
|
|
126
|
+
def ssh_command
|
|
127
|
+
ssh_dir = File.join(@scrape_dir_path, '.ssh')
|
|
128
|
+
FileUtils.mkdir_p(ssh_dir)
|
|
129
|
+
key_content = @repo.first_credential
|
|
130
|
+
if key_content.nil?
|
|
131
|
+
# Explicitely disable public key authentication so we don't endup using the system's key
|
|
132
|
+
options = { :PubkeyAuthentication => 'no' }
|
|
133
|
+
else
|
|
134
|
+
ssh_key_path = File.join(ssh_dir, 'ssh.pub')
|
|
135
|
+
File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
|
|
136
|
+
File.chmod(0600, ssh_key_path)
|
|
137
|
+
options = { :IdentityFile => ssh_key_path }
|
|
138
|
+
end
|
|
139
|
+
ssh_config = File.join(ssh_dir, 'ssh_config')
|
|
140
|
+
File.open(ssh_config, 'w') { |f| f.puts(ssh_options(options)) }
|
|
141
|
+
ssh = File.join(ssh_dir, 'ssh')
|
|
142
|
+
File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
|
|
143
|
+
File.chmod(0755, ssh)
|
|
144
|
+
"GIT_SSH=#{ssh}"
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Resolves whehter repository tag is a git tag or a git branch
|
|
148
|
+
# Return output of run commands too
|
|
149
|
+
# Note:: Assume that current working directory is a git directory
|
|
150
|
+
#
|
|
151
|
+
# === Parameters
|
|
152
|
+
# ssh_cmd<String>:: SSH command to be used with git if any
|
|
153
|
+
#
|
|
154
|
+
# === Return
|
|
155
|
+
# res<Array>::
|
|
156
|
+
# - res[0] is true if git repo has a tag with a name corresponding to the repository tag
|
|
157
|
+
# - res[1] is true if git repo has a branch with a name corresponding to the repository tag
|
|
158
|
+
# - res[2] contains the git output
|
|
159
|
+
def git_tag_kind(ssh_cmd)
|
|
160
|
+
return [ false, true, "" ] if @repo.tag.nil? || @repo.tag.empty? || @repo.tag == 'master'
|
|
161
|
+
output = `#{ssh_cmd} git fetch --tags --depth 1 2>&1`
|
|
162
|
+
is_tag = `git tag`.split("\n").include?(@repo.tag)
|
|
163
|
+
is_branch = `git branch -r`.split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
|
|
164
|
+
res = [ is_tag, is_branch, output ]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
end
|