right_scraper 1.0.26 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +16 -0
- data/README.rdoc +9 -28
- data/Rakefile +51 -39
- data/lib/right_scraper/builders/base.rb +64 -0
- data/lib/right_scraper/builders/filesystem.rb +96 -0
- data/lib/right_scraper/builders/union.rb +57 -0
- data/lib/right_scraper/logger.rb +102 -0
- data/lib/right_scraper/loggers/noisy.rb +85 -0
- data/lib/right_scraper/processes/ssh.rb +188 -0
- data/lib/right_scraper/repositories/base.rb +299 -0
- data/lib/right_scraper/repositories/download.rb +90 -0
- data/lib/right_scraper/repositories/git.rb +92 -0
- data/lib/right_scraper/repositories/mock.rb +70 -0
- data/lib/right_scraper/repositories/svn.rb +96 -0
- data/lib/right_scraper/resources/base.rb +70 -0
- data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
- data/lib/right_scraper/resources/workflow.rb +55 -0
- data/lib/right_scraper/retrievers/base.rb +114 -0
- data/lib/right_scraper/retrievers/checkout.rb +79 -0
- data/lib/right_scraper/retrievers/download.rb +97 -0
- data/lib/right_scraper/retrievers/git.rb +140 -0
- data/lib/right_scraper/retrievers/svn.rb +87 -0
- data/lib/right_scraper/scanners/base.rb +111 -0
- data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
- data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
- data/lib/right_scraper/scanners/union.rb +89 -0
- data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
- data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
- data/lib/right_scraper/scraper.rb +81 -57
- data/lib/right_scraper/scraper_logger.rb +61 -0
- data/lib/right_scraper/scrapers/base.rb +262 -0
- data/lib/right_scraper/scrapers/cookbook.rb +73 -0
- data/lib/right_scraper/scrapers/workflow.rb +88 -0
- data/lib/right_scraper/svn_client.rb +101 -0
- data/lib/right_scraper/version.rb +28 -0
- data/lib/right_scraper.rb +35 -11
- data/right_scraper.gemspec +26 -13
- data/right_scraper.rconf +13 -0
- data/spec/builder_spec.rb +50 -0
- data/spec/cookbook_helper.rb +73 -0
- data/spec/cookbook_manifest_spec.rb +55 -0
- data/spec/cookbook_s3_upload_spec.rb +152 -0
- data/spec/download/download_retriever_spec.rb +118 -0
- data/spec/download/download_retriever_spec_helper.rb +72 -0
- data/spec/download/download_spec.rb +130 -0
- data/spec/download/multi_dir_spec.rb +106 -0
- data/spec/download/multi_dir_spec_helper.rb +40 -0
- data/spec/git/cookbook_spec.rb +166 -0
- data/spec/git/demokey +27 -0
- data/spec/git/demokey.pub +1 -0
- data/spec/git/password_key +30 -0
- data/spec/git/password_key.pub +1 -0
- data/spec/git/repository_spec.rb +110 -0
- data/spec/git/retriever_spec.rb +505 -0
- data/spec/git/retriever_spec_helper.rb +112 -0
- data/spec/git/scraper_spec.rb +136 -0
- data/spec/git/ssh_spec.rb +170 -0
- data/spec/git/url_spec.rb +103 -0
- data/spec/logger_spec.rb +185 -0
- data/spec/repository_spec.rb +89 -23
- data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
- data/spec/scanner_spec.rb +61 -0
- data/spec/scraper_helper.rb +96 -0
- data/spec/scraper_spec.rb +123 -45
- data/spec/spec_helper.rb +87 -14
- data/spec/svn/cookbook_spec.rb +97 -0
- data/spec/svn/multi_svn_spec.rb +64 -0
- data/spec/svn/multi_svn_spec_helper.rb +40 -0
- data/spec/svn/repository_spec.rb +72 -0
- data/spec/svn/retriever_spec.rb +261 -0
- data/spec/svn/scraper_spec.rb +90 -0
- data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
- data/spec/svn/url_spec.rb +47 -0
- data/spec/url_spec.rb +164 -0
- metadata +203 -31
- data/lib/right_scraper/linux/process_monitor.rb +0 -84
- data/lib/right_scraper/repository.rb +0 -78
- data/lib/right_scraper/scraper_base.rb +0 -175
- data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
- data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
- data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
- data/lib/right_scraper/watcher.rb +0 -158
- data/lib/right_scraper/win32/process_monitor.rb +0 -98
- data/spec/download/download_scraper_spec.rb +0 -94
- data/spec/git/git_scraper_spec.rb +0 -165
- data/spec/git/git_scraper_spec_helper.rb +0 -72
- data/spec/rcov.opts +0 -1
- data/spec/spec.opts +0 -2
- data/spec/svn/svn_scraper_spec.rb +0 -148
- data/spec/watcher_spec.rb +0 -74
@@ -1,84 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# 'Software'), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
-
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
-
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
-
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
-
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
module RightScale
|
25
|
-
|
26
|
-
# *nix specific watcher implementation
|
27
|
-
class ProcessMonitor
|
28
|
-
# Spawn given process and callback given block with output and exit code. This method
|
29
|
-
# accepts a variable number of parameters; the first param is always the command to
|
30
|
-
# run; successive parameters are command-line arguments for the process.
|
31
|
-
#
|
32
|
-
# === Parameters
|
33
|
-
# cmd(String):: Name of the command to run
|
34
|
-
# arg1(String):: Optional, first command-line argumument
|
35
|
-
# arg2(String):: Optional, first command-line argumument
|
36
|
-
# ...
|
37
|
-
# argN(String):: Optional, Nth command-line argumument
|
38
|
-
#
|
39
|
-
# === Block
|
40
|
-
# Given block should take one argument which is a hash which may contain
|
41
|
-
# the keys :output and :exit_code. The value associated with :output is a chunk
|
42
|
-
# of output while the value associated with :exit_code is the process exit code
|
43
|
-
# This block won't be called anymore once the :exit_code key has associated value
|
44
|
-
#
|
45
|
-
# === Return
|
46
|
-
# pid(Integer):: Spawned process pid
|
47
|
-
def spawn(cmd, *args)
|
48
|
-
args = args.map { |a| a.to_s } #exec only likes string arguments
|
49
|
-
|
50
|
-
#Run subprocess; capture its output using a pipe
|
51
|
-
pr, pw = IO::pipe
|
52
|
-
@pid = fork do
|
53
|
-
pr.close
|
54
|
-
STDIN.reopen(File.open('/dev/null', 'r'))
|
55
|
-
STDOUT.reopen(pw)
|
56
|
-
STDERR.reopen(pw)
|
57
|
-
exec(cmd, *args)
|
58
|
-
end
|
59
|
-
|
60
|
-
#Monitor subprocess output and status in a dedicated thread
|
61
|
-
pw.close
|
62
|
-
@io = pr
|
63
|
-
@reader = Thread.new do
|
64
|
-
until @io.eof?
|
65
|
-
yield(:output => @io.read)
|
66
|
-
end
|
67
|
-
Process.wait(@pid)
|
68
|
-
yield(:exit_code => $?.exitstatus)
|
69
|
-
end
|
70
|
-
|
71
|
-
return @pid
|
72
|
-
end
|
73
|
-
|
74
|
-
# Close io and join reader thread
|
75
|
-
#
|
76
|
-
# === Return
|
77
|
-
# true:: Always return true
|
78
|
-
def cleanup
|
79
|
-
@reader.join
|
80
|
-
@io.close
|
81
|
-
end
|
82
|
-
|
83
|
-
end
|
84
|
-
end
|
@@ -1,78 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# 'Software'), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
-
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
-
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
-
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
-
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
module RightScale
|
25
|
-
|
26
|
-
# Description of remote repository that needs to be scraped.
|
27
|
-
class Repository
|
28
|
-
|
29
|
-
# (String) Human readable repository name used for progress reports
|
30
|
-
attr_accessor :display_name
|
31
|
-
|
32
|
-
# (String) One of 'git', 'svn' or 'download'
|
33
|
-
attr_accessor :repo_type
|
34
|
-
|
35
|
-
# (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
|
36
|
-
attr_accessor :url
|
37
|
-
|
38
|
-
# (String) Optional, tag or branch of repository that should be downloaded
|
39
|
-
# Not used for 'download' repositories
|
40
|
-
attr_accessor :tag
|
41
|
-
|
42
|
-
# (Array) List of directories containing cookbooks in repository
|
43
|
-
# Root directory is used if this is nil or empty
|
44
|
-
attr_accessor :cookbooks_path
|
45
|
-
|
46
|
-
# (String) Optional, SVN username or git private SSH key content
|
47
|
-
attr_accessor :first_credential
|
48
|
-
|
49
|
-
# (String) Optional, SVN password
|
50
|
-
attr_accessor :second_credential
|
51
|
-
|
52
|
-
# Initialize repository from given hash
|
53
|
-
# Hash keys should correspond to attributes of this class
|
54
|
-
#
|
55
|
-
# === Parameters
|
56
|
-
# opts(Hash):: Hash to be converted into a RightScale::Repository instance
|
57
|
-
#
|
58
|
-
# === Return
|
59
|
-
# repo(RightScale::Repository):: Resulting repository instance
|
60
|
-
def self.from_hash(opts)
|
61
|
-
repo = RightScale::Repository.new
|
62
|
-
opts.each do |k, v|
|
63
|
-
repo.__send__("#{k.to_s}=".to_sym, v)
|
64
|
-
end
|
65
|
-
repo
|
66
|
-
end
|
67
|
-
|
68
|
-
# Unique representation for this repo, should resolve to the same string
|
69
|
-
# for repos that should be cloned in same directory
|
70
|
-
#
|
71
|
-
# === Returns
|
72
|
-
# res(String):: Unique representation for this repo
|
73
|
-
def to_s
|
74
|
-
res = "#{repo_type} #{url}:#{tag}"
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
end
|
@@ -1,175 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# 'Software'), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
-
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
-
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
-
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
-
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
require 'digest/md5'
|
25
|
-
|
26
|
-
module RightScale
|
27
|
-
|
28
|
-
# Base class for all scrapers.
|
29
|
-
# Actual scraper implementation should override scrape_imp and optionally incremental_update?
|
30
|
-
class ScraperBase
|
31
|
-
|
32
|
-
# (String) Path to directory containing all scraped repositories
|
33
|
-
attr_accessor :root_dir
|
34
|
-
|
35
|
-
# (RightScale::Repository) Last scraped repository
|
36
|
-
attr_reader :repo
|
37
|
-
|
38
|
-
# (Array) Error messages if any
|
39
|
-
attr_reader :errors
|
40
|
-
|
41
|
-
# (String) Path to local directory where repository was downloaded
|
42
|
-
attr_reader :current_repo_dir
|
43
|
-
|
44
|
-
# Set path to directory containing all scraped repos as well as space and time upperbounds
|
45
|
-
#
|
46
|
-
# === Parameters
|
47
|
-
# root_dir(String):: Path to scraped repos parent directory
|
48
|
-
# max_bytes(Integer):: Maximum size allowed for repos, -1 for no limit (default)
|
49
|
-
# max_seconds(Integer):: Maximum number of seconds a single scrape operation should take, -1 for no limit (default)
|
50
|
-
def initialize(root_dir, max_bytes, max_seconds)
|
51
|
-
@root_dir = root_dir
|
52
|
-
@watcher = Watcher.new(max_bytes, max_seconds)
|
53
|
-
end
|
54
|
-
|
55
|
-
# Common implementation of scrape method for all repository types.
|
56
|
-
# Each scraper implementation should override scrape_imp which is called
|
57
|
-
# after this method initializes all the scraper attributes properly.
|
58
|
-
# See RightScale::Scraper#scrape
|
59
|
-
def scrape(repo, incremental=true, &callback)
|
60
|
-
@repo = repo
|
61
|
-
@callback = callback
|
62
|
-
@current_repo_dir = ScraperBase.repo_dir(root_dir, repo)
|
63
|
-
@scrape_dir_path = File.expand_path(File.join(@current_repo_dir, '..'))
|
64
|
-
@incremental = incremental && incremental_update?
|
65
|
-
@errors = []
|
66
|
-
FileUtils.rm_rf(@current_repo_dir) unless @incremental
|
67
|
-
scrape_imp
|
68
|
-
true
|
69
|
-
end
|
70
|
-
|
71
|
-
# Path to directory where given repo should be or was downloaded
|
72
|
-
#
|
73
|
-
# === Parameters
|
74
|
-
# root_dir(String):: Path to directory containing all scraped repositories
|
75
|
-
# repo(Hash|RightScale::Repository):: Remote repository corresponding to local directory
|
76
|
-
#
|
77
|
-
# === Return
|
78
|
-
# repo_dir(String):: Path to local directory that corresponds to given repository
|
79
|
-
def self.repo_dir(root_dir, repo)
|
80
|
-
repo = Repository.from_hash(repo) if repo.is_a?(Hash)
|
81
|
-
dir_name = Digest::MD5.hexdigest(repo.to_s)
|
82
|
-
dir_path = File.join(root_dir, dir_name)
|
83
|
-
repo_dir = "#{dir_path}/repo"
|
84
|
-
end
|
85
|
-
|
86
|
-
# Was last call to scrape successful?
|
87
|
-
# Call errors to get error messages if false
|
88
|
-
#
|
89
|
-
# === Return
|
90
|
-
# succeeded(TrueClass|FalseClass):: true if scrape finished with no error, false otherwise.
|
91
|
-
def succeeded?
|
92
|
-
succeeded = @errors.nil? || @errors.size == 0
|
93
|
-
end
|
94
|
-
|
95
|
-
protected
|
96
|
-
|
97
|
-
# Check whether it is possible to perform an incremental update of the repo
|
98
|
-
#
|
99
|
-
# === Return
|
100
|
-
# true:: Scrape directory contains files belonging to the scraped repo and protocol supports
|
101
|
-
# incremental updates
|
102
|
-
# false:: Otherwise
|
103
|
-
def incremental_update?
|
104
|
-
false # Incremental updates not supported by default
|
105
|
-
end
|
106
|
-
|
107
|
-
# Override this method with scraper specific implementation in descendants
|
108
|
-
#
|
109
|
-
# === Return
|
110
|
-
# true:: Always return true
|
111
|
-
def scrape_imp
|
112
|
-
raise "Method not implemented"
|
113
|
-
end
|
114
|
-
|
115
|
-
# Update state of scraper according to status returned by watcher
|
116
|
-
#
|
117
|
-
# === Parameters
|
118
|
-
# res(RightScale::WatchResult):: Watcher status to be analyzed
|
119
|
-
# msg_title(String):: Error message title in case of failure
|
120
|
-
# ok_codes:: Successful process return codes, only 0 by default
|
121
|
-
#
|
122
|
-
# === Return
|
123
|
-
# true:: Always return true
|
124
|
-
def handle_watcher_result(res, msg_title, ok_codes=[0])
|
125
|
-
if res.status == :timeout
|
126
|
-
@errors << "#{msg_title} is taking more time than #{@watcher.max_seconds / 60} minutes, aborting..."
|
127
|
-
FileUtils.rm_rf(@current_repo_dir)
|
128
|
-
elsif res.status == :size_exceeded
|
129
|
-
@errors << "#{msg_title} is taking more space than #{@watcher.max_bytes / 1048576} MB, aborting..."
|
130
|
-
FileUtils.rm_rf(@current_repo_dir)
|
131
|
-
elsif !ok_codes.include?(res.exit_code)
|
132
|
-
if @incremental
|
133
|
-
@callback.call("#{msg_title} failed: #{res.output}, reverting to non incremental update", is_step=false) if @callback
|
134
|
-
FileUtils.rm_rf(@current_repo_dir)
|
135
|
-
@incremental = false
|
136
|
-
else
|
137
|
-
@errors << "#{msg_title} failed: #{res.output}"
|
138
|
-
end
|
139
|
-
end
|
140
|
-
true
|
141
|
-
end
|
142
|
-
|
143
|
-
# Spawn given process, wait for it to complete, and return its output The exit status
|
144
|
-
# of the process is available in the $? global. Functions similarly to the backtick
|
145
|
-
# operator, only it avoids invoking the command interpreter under operating systems
|
146
|
-
# that support fork-and-exec.
|
147
|
-
#
|
148
|
-
# This method accepts a variable number of parameters; the first param is always the
|
149
|
-
# command to run; successive parameters are command-line arguments for the process.
|
150
|
-
#
|
151
|
-
# === Parameters
|
152
|
-
# cmd(String):: Name of the command to run
|
153
|
-
# arg1(String):: Optional, first command-line argumument
|
154
|
-
# arg2(String):: Optional, first command-line argumument
|
155
|
-
# ...
|
156
|
-
# argN(String):: Optional, Nth command-line argumument
|
157
|
-
#
|
158
|
-
# === Return
|
159
|
-
# output(String):: The process' output
|
160
|
-
def run(cmd, *args)
|
161
|
-
pm = ProcessMonitor.new
|
162
|
-
output = StringIO.new
|
163
|
-
|
164
|
-
pm.spawn(cmd, *args) do |options|
|
165
|
-
output << options[:output] if options[:output]
|
166
|
-
end
|
167
|
-
|
168
|
-
pm.cleanup
|
169
|
-
output.close
|
170
|
-
output = output.string
|
171
|
-
return output
|
172
|
-
end
|
173
|
-
|
174
|
-
end
|
175
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# 'Software'), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
-
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
-
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
-
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
-
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
module RightScale
|
25
|
-
|
26
|
-
class DownloadScraper < ScraperBase
|
27
|
-
|
28
|
-
# Download and expand remote repository, see RightScale::ScraperBase#scrape
|
29
|
-
#
|
30
|
-
# === Return
|
31
|
-
# true:: Always return true
|
32
|
-
def scrape_imp
|
33
|
-
msg = "Downloading repository '#{@repo.display_name}'"
|
34
|
-
@callback.call(msg, is_step=true) if @callback
|
35
|
-
filename = @repo.url.split('/').last
|
36
|
-
|
37
|
-
if @repo.first_credential && @repo.second_credential &&
|
38
|
-
!@repo.first_credential.strip.empty? && !@repo.second_credential.strip.empty?
|
39
|
-
user_opt = ['--user', "#{@repo.first_credential}:#{@repo.second_credential}"]
|
40
|
-
else
|
41
|
-
user_opt = []
|
42
|
-
end
|
43
|
-
|
44
|
-
args = ['--fail', '--silent', '--show-error', '--insecure', '--location']
|
45
|
-
args += user_opt
|
46
|
-
args += ['--output', "#{@current_repo_dir}/#{filename}", @repo.url]
|
47
|
-
|
48
|
-
FileUtils.mkdir_p(@current_repo_dir)
|
49
|
-
res = @watcher.launch_and_watch('curl', args, @current_repo_dir)
|
50
|
-
handle_watcher_result(res, 'Download')
|
51
|
-
if succeeded?
|
52
|
-
unzip_opt = case @repo.url[/\.(.*)$/]
|
53
|
-
when 'bzip', 'bzip2', 'bz2' then 'j'
|
54
|
-
when 'tgz', 'gzip', 'gz' then 'z'
|
55
|
-
else ''
|
56
|
-
end
|
57
|
-
Dir.chdir(@current_repo_dir) do
|
58
|
-
res = run('tar', "x#{unzip_opt}f", filename)
|
59
|
-
@errors << res unless $?.success?
|
60
|
-
File.delete(filename)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
true
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
end
|
@@ -1,283 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# 'Software'), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
-
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
-
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
-
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
-
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
module RightScale
|
25
|
-
|
26
|
-
class GitScraper < ScraperBase
|
27
|
-
|
28
|
-
# Check whether it is possible to perform an incremental update of the repo
|
29
|
-
#
|
30
|
-
# === Return
|
31
|
-
# true:: Scrape directory contains files belonging to the scraped repo and protocol supports
|
32
|
-
# incremental updates
|
33
|
-
# false:: Otherwise
|
34
|
-
def incremental_update?
|
35
|
-
# FIX: current version of msysgit crashes attempting "git pull" on 64-bit
|
36
|
-
# servers. we will avoid incremental for now in hopes of getting a fix for
|
37
|
-
# msysgit or else a native Windows implementation such as Git#
|
38
|
-
return false if (is_windows? || !File.directory?(@current_repo_dir))
|
39
|
-
Dir.chdir(@current_repo_dir) do
|
40
|
-
remote_url = run('git', 'config', '--get', 'remote.origin.url').chomp
|
41
|
-
$?.success? && remote_url == @repo.url
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
# Scrape git repository, see RightScale::ScraperBase#scrape
|
46
|
-
#
|
47
|
-
# === Return
|
48
|
-
# true:: Always return true
|
49
|
-
def scrape_imp
|
50
|
-
msg = @incremental ? 'Pulling ' : 'Cloning '
|
51
|
-
msg += "git repository '#{@repo.display_name}'"
|
52
|
-
@callback.call(msg, is_step=true) if @callback
|
53
|
-
@ssh_cmd = ssh_command
|
54
|
-
is_tag = is_branch = on_branch = nil
|
55
|
-
has_tag = !@repo.tag.nil? && !@repo.tag.empty?
|
56
|
-
|
57
|
-
if @incremental
|
58
|
-
checkout = false
|
59
|
-
Dir.chdir(@current_repo_dir) do
|
60
|
-
if has_tag
|
61
|
-
analysis = analyze_repo_tag
|
62
|
-
if succeeded?
|
63
|
-
is_tag = analysis[:tag]
|
64
|
-
is_branch = analysis[:branch]
|
65
|
-
on_branch = analysis[:on_branch]
|
66
|
-
checkout = is_tag && !is_branch
|
67
|
-
if is_tag && is_branch
|
68
|
-
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
69
|
-
elsif !is_tag && !is_branch
|
70
|
-
current_sha = run('git', 'rev-parse', 'HEAD').chomp
|
71
|
-
if current_sha == @repo.tag
|
72
|
-
@callback.call("Nothing to update: already using #{@repo.tag}", is_step=false) if @callback
|
73
|
-
return true
|
74
|
-
else
|
75
|
-
# Probably a SHA, retrieve all commits
|
76
|
-
git_fetch(:depth => 2**31 - 1)
|
77
|
-
checkout = true
|
78
|
-
end
|
79
|
-
end
|
80
|
-
if succeeded?
|
81
|
-
if checkout
|
82
|
-
git_checkout(@repo.tag)
|
83
|
-
else
|
84
|
-
git_checkout(@repo.tag) if is_branch && !on_branch
|
85
|
-
git_fetch(:depth => 1, :merge => true, :remote_tag => @repo.tag)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
else
|
90
|
-
git_fetch(:depth => 1, :merge => true)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
if !@incremental && succeeded?
|
96
|
-
args = ['clone', '--quiet', '--depth', '1', @repo.url, @current_repo_dir]
|
97
|
-
ENV['GIT_SSH'] = @ssh_cmd
|
98
|
-
res = @watcher.launch_and_watch('git', args, @current_repo_dir)
|
99
|
-
ENV['GIT_SSH'] = nil
|
100
|
-
handle_watcher_result(res, 'git clone')
|
101
|
-
if has_tag && succeeded?
|
102
|
-
Dir.chdir(@current_repo_dir) do
|
103
|
-
if is_tag.nil?
|
104
|
-
analysis = analyze_repo_tag
|
105
|
-
is_tag = analysis[:tag]
|
106
|
-
is_branch = analysis[:branch]
|
107
|
-
on_branch = analysis[:on_branch]
|
108
|
-
end
|
109
|
-
if succeeded?
|
110
|
-
if is_tag && is_branch
|
111
|
-
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
112
|
-
elsif is_branch
|
113
|
-
if !on_branch
|
114
|
-
output = run('git', 'branch', @repo.tag, "origin/#{repo.tag}")
|
115
|
-
@errors << output unless $?.success?
|
116
|
-
end
|
117
|
-
elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
|
118
|
-
git_fetch(:depth => 2**31 -1)
|
119
|
-
end
|
120
|
-
if succeeded? && !on_branch
|
121
|
-
git_checkout(@repo.tag)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
true
|
128
|
-
end
|
129
|
-
|
130
|
-
# Default SSH options used with git
|
131
|
-
DEFAULT_SSH_OPTIONS = { :PasswordAuthentication => 'no',
|
132
|
-
:HostbasedAuthentication => 'no',
|
133
|
-
:StrictHostKeyChecking => 'no',
|
134
|
-
:IdentitiesOnly => 'yes' }
|
135
|
-
|
136
|
-
# SSH options command line built from default options and given custom options
|
137
|
-
#
|
138
|
-
# === Parameters
|
139
|
-
# opts(Hash):: Custom options
|
140
|
-
#
|
141
|
-
# === Return
|
142
|
-
# options(String):: SSH command line options
|
143
|
-
def ssh_options(opts={})
|
144
|
-
opts = DEFAULT_SSH_OPTIONS.merge(opts || {})
|
145
|
-
options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
|
146
|
-
end
|
147
|
-
|
148
|
-
# Store private SSH key into temporary folder and create temporary script
|
149
|
-
# that wraps SSH and uses this key.
|
150
|
-
#
|
151
|
-
# === Return
|
152
|
-
# ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
|
153
|
-
def ssh_command
|
154
|
-
return win32_ssh_command if is_windows?
|
155
|
-
ssh_dir = File.join(@scrape_dir_path, '.ssh')
|
156
|
-
FileUtils.mkdir_p(ssh_dir)
|
157
|
-
key_content = @repo.first_credential
|
158
|
-
if key_content.nil?
|
159
|
-
# Explicitely disable public key authentication so we don't endup using the system's key
|
160
|
-
options = { :PubkeyAuthentication => 'no' }
|
161
|
-
else
|
162
|
-
ssh_key_path = File.join(ssh_dir, 'id_rsa')
|
163
|
-
File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
|
164
|
-
File.chmod(0600, ssh_key_path)
|
165
|
-
options = { :IdentityFile => ssh_key_path }
|
166
|
-
end
|
167
|
-
ssh_config = File.join(ssh_dir, 'ssh_config')
|
168
|
-
File.open(ssh_config, 'w') { |f| f.puts(ssh_options(options)) }
|
169
|
-
ssh = File.join(ssh_dir, 'ssh')
|
170
|
-
File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
|
171
|
-
File.chmod(0755, ssh)
|
172
|
-
|
173
|
-
return ssh
|
174
|
-
end
|
175
|
-
|
176
|
-
# Prepare SSH for git on Windows
|
177
|
-
# The GIT_SSH trick doesn't seem to work on Windows, instead actually
|
178
|
-
# save the private key in the user ssh folder.
|
179
|
-
# Note: This will override any pre-existing SSH key that was on the system
|
180
|
-
#
|
181
|
-
# === Return
|
182
|
-
# '':: Always return an empty string
|
183
|
-
#
|
184
|
-
# === Raise
|
185
|
-
# Exception:: If the USERPROFILE environment variable is not set
|
186
|
-
def win32_ssh_command
|
187
|
-
key_content = @repo.first_credential
|
188
|
-
unless key_content.nil?
|
189
|
-
# resolve key file path.
|
190
|
-
raise 'Environment variable USERPROFILE is missing' unless ENV['USERPROFILE']
|
191
|
-
user_profile_dir_path = ENV['USERPROFILE']
|
192
|
-
ssh_keys_dir = File.join(user_profile_dir_path, '.ssh')
|
193
|
-
FileUtils.mkdir_p(ssh_keys_dir) unless File.directory?(ssh_keys_dir)
|
194
|
-
ssh_key_file_path = File.join(ssh_keys_dir, 'id_rsa')
|
195
|
-
|
196
|
-
# (re)create key file. must overwrite any existing credentials in case
|
197
|
-
# we are switching repositories and have different credentials for each.
|
198
|
-
File.open(ssh_key_file_path, 'w') { |f| f.puts(key_content) }
|
199
|
-
|
200
|
-
# we need to create the "known_hosts" file or else the process will
|
201
|
-
# halt in windows waiting for a yes/no response to the unknown
|
202
|
-
# git host. this is normally handled by specifying
|
203
|
-
# "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
|
204
|
-
# still a mystery why this doesn't work properly in windows.
|
205
|
-
# so make a ssh call which creates the proper "known_hosts" file.
|
206
|
-
run('ssh', '-o', 'StrictHostKeyChecking=no', repo.url.split(':').first)
|
207
|
-
end
|
208
|
-
return ''
|
209
|
-
end
|
210
|
-
|
211
|
-
# Fetch remote commits using given depth
|
212
|
-
# Check size of repo and time it takes to retrieve commits
|
213
|
-
# Update errors collection upon failure (check for succeeded? after call)
|
214
|
-
# Note: Assume that current working directory is a git directory
|
215
|
-
#
|
216
|
-
# === Parameters
|
217
|
-
# opts[:depth(Integer):: Git fetch depth argument, full fetch if not set
|
218
|
-
# opts[:merge]:: Do a pull if set
|
219
|
-
# opts[:remote_tag]:: Remote ref to use, use default if not specified
|
220
|
-
#
|
221
|
-
# === Return
|
222
|
-
# true:: Always return true
|
223
|
-
def git_fetch(opts={})
|
224
|
-
depth = opts[:depth] || 2**31 - 1 # Specify max to override depth of already cloned repo
|
225
|
-
remote = opts[:remote_tag]
|
226
|
-
remote = 'master' if remote.nil? || remote.rstrip.empty?
|
227
|
-
action = (opts[:merge] ? 'pull' : 'fetch')
|
228
|
-
args = [action, '--tags', '--depth', depth, 'origin', remote]
|
229
|
-
ENV['GIT_SSH'] = @ssh_cmd
|
230
|
-
res = @watcher.launch_and_watch('git', args, @current_repo_dir)
|
231
|
-
ENV['GIT_SSH'] = nil
|
232
|
-
handle_watcher_result(res, "git #{action}")
|
233
|
-
end
|
234
|
-
|
235
|
-
# Does a git checkout to given tag
|
236
|
-
# Update errors collection upon failure (check for succeeded? after call)
|
237
|
-
# Note: Assume that current working directory is a git directory
|
238
|
-
#
|
239
|
-
# === Parameters
|
240
|
-
# tag(String):: Tag to checkout
|
241
|
-
#
|
242
|
-
# === Return
|
243
|
-
# output(String):: Output of git command
|
244
|
-
def git_checkout(tag)
|
245
|
-
output = run('git', 'checkout', tag)
|
246
|
-
@errors << output unless $?.success?
|
247
|
-
output
|
248
|
-
end
|
249
|
-
|
250
|
-
# Analyze repository tag to detect whether it's a branch, a tag or neither (i.e. SHA ref)
|
251
|
-
# Also detech wether the branch is already checked out
|
252
|
-
# Update errors collection upon failure (check for succeeded? after call)
|
253
|
-
# Note: Assume that current working directory is a git directory
|
254
|
-
#
|
255
|
-
# === Return
|
256
|
-
# res(Hash)::
|
257
|
-
# - res[:tag]:: true if git repo has a tag with a name corresponding to the repository tag
|
258
|
-
# - res[:branch]:: true if git repo has a branch with a name corresponding to the repository tag
|
259
|
-
# - res [:on_branch]:: true if branch is already checked out
|
260
|
-
def analyze_repo_tag
|
261
|
-
is_tag = is_branch = on_branch = nil
|
262
|
-
begin
|
263
|
-
is_tag = run('git', 'tag').split("\n").include?(@repo.tag)
|
264
|
-
is_branch = run('git', 'branch', '-r').split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
|
265
|
-
on_branch = is_branch && !!run('git', 'branch').split("\n").include?("* #{@repo.tag}")
|
266
|
-
rescue Exception => e
|
267
|
-
@errors << "Analysis of repository tag failed with: #{e.message}"
|
268
|
-
end
|
269
|
-
res = { :tag => is_tag, :branch => is_branch, :on_branch => on_branch }
|
270
|
-
end
|
271
|
-
|
272
|
-
private
|
273
|
-
|
274
|
-
# Check for windows.
|
275
|
-
#
|
276
|
-
# === Return
|
277
|
-
#
|
278
|
-
def is_windows?
|
279
|
-
return !!(RUBY_PLATFORM =~ /mswin/)
|
280
|
-
end
|
281
|
-
|
282
|
-
end
|
283
|
-
end
|