right_scraper 1.0.23 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/right_scraper/linux/process_monitor.rb +29 -12
- data/lib/right_scraper/scraper_base.rb +31 -0
- data/lib/right_scraper/scrapers/download_scraper.rb +14 -6
- data/lib/right_scraper/scrapers/git_scraper.rb +20 -15
- data/lib/right_scraper/scrapers/svn_scraper.rb +18 -17
- data/lib/right_scraper/watcher.rb +2 -2
- data/lib/right_scraper/win32/process_monitor.rb +9 -2
- data/right_scraper.gemspec +1 -1
- data/spec/watcher_spec.rb +8 -4
- metadata +19 -5
@@ -25,11 +25,16 @@ module RightScale
|
|
25
25
|
|
26
26
|
# *nix specific watcher implementation
|
27
27
|
class ProcessMonitor
|
28
|
-
|
29
|
-
#
|
28
|
+
# Spawn given process and callback given block with output and exit code. This method
|
29
|
+
# accepts a variable number of parameters; the first param is always the command to
|
30
|
+
# run; successive parameters are command-line arguments for the process.
|
30
31
|
#
|
31
32
|
# === Parameters
|
32
|
-
# cmd(String)::
|
33
|
+
# cmd(String):: Name of the command to run
|
34
|
+
# arg1(String):: Optional, first command-line argumument
|
35
|
+
# arg2(String):: Optional, first command-line argumument
|
36
|
+
# ...
|
37
|
+
# argN(String):: Optional, Nth command-line argumument
|
33
38
|
#
|
34
39
|
# === Block
|
35
40
|
# Given block should take one argument which is a hash which may contain
|
@@ -39,19 +44,31 @@ module RightScale
|
|
39
44
|
#
|
40
45
|
# === Return
|
41
46
|
# pid(Integer):: Spawned process pid
|
42
|
-
def spawn(cmd)
|
43
|
-
|
44
|
-
|
47
|
+
def spawn(cmd, *args)
|
48
|
+
args = args.map { |a| a.to_s } #exec only likes string arguments
|
49
|
+
|
50
|
+
#Run subprocess; capture its output using a pipe
|
51
|
+
pr, pw = IO::pipe
|
52
|
+
@pid = fork do
|
53
|
+
pr.close
|
54
|
+
STDIN.reopen(File.open('/dev/null', 'r'))
|
55
|
+
STDOUT.reopen(pw)
|
56
|
+
STDERR.reopen(pw)
|
57
|
+
exec(cmd, *args)
|
58
|
+
end
|
59
|
+
|
60
|
+
#Monitor subprocess output and status in a dedicated thread
|
61
|
+
pw.close
|
62
|
+
@io = pr
|
45
63
|
@reader = Thread.new do
|
46
|
-
|
47
|
-
|
48
|
-
yield(:output => o)
|
49
|
-
o = @io.read
|
64
|
+
until @io.eof?
|
65
|
+
yield(:output => @io.read)
|
50
66
|
end
|
51
|
-
Process.wait(@
|
67
|
+
Process.wait(@pid)
|
52
68
|
yield(:exit_code => $?.exitstatus)
|
53
69
|
end
|
54
|
-
|
70
|
+
|
71
|
+
return @pid
|
55
72
|
end
|
56
73
|
|
57
74
|
# Close io and join reader thread
|
@@ -140,5 +140,36 @@ module RightScale
|
|
140
140
|
true
|
141
141
|
end
|
142
142
|
|
143
|
+
# Spawn given process, wait for it to complete, and return its output The exit status
|
144
|
+
# of the process is available in the $? global. Functions similarly to the backtick
|
145
|
+
# operator, only it avoids invoking the command interpreter under operating systems
|
146
|
+
# that support fork-and-exec.
|
147
|
+
#
|
148
|
+
# This method accepts a variable number of parameters; the first param is always the
|
149
|
+
# command to run; successive parameters are command-line arguments for the process.
|
150
|
+
#
|
151
|
+
# === Parameters
|
152
|
+
# cmd(String):: Name of the command to run
|
153
|
+
# arg1(String):: Optional, first command-line argumument
|
154
|
+
# arg2(String):: Optional, first command-line argumument
|
155
|
+
# ...
|
156
|
+
# argN(String):: Optional, Nth command-line argumument
|
157
|
+
#
|
158
|
+
# === Return
|
159
|
+
# output(String):: The process' output
|
160
|
+
def run(cmd, *args)
|
161
|
+
pm = ProcessMonitor.new
|
162
|
+
output = StringIO.new
|
163
|
+
|
164
|
+
pm.spawn(cmd, *args) do |options|
|
165
|
+
output << options[:output] if options[:output]
|
166
|
+
end
|
167
|
+
|
168
|
+
pm.cleanup
|
169
|
+
output.close
|
170
|
+
output = output.string
|
171
|
+
return output
|
172
|
+
end
|
173
|
+
|
143
174
|
end
|
144
175
|
end
|
@@ -33,10 +33,19 @@ module RightScale
|
|
33
33
|
msg = "Downloading repository '#{@repo.display_name}'"
|
34
34
|
@callback.call(msg, is_step=true) if @callback
|
35
35
|
filename = @repo.url.split('/').last
|
36
|
-
|
37
|
-
|
36
|
+
|
37
|
+
if @repo.first_credential && @repo.second_credential
|
38
|
+
user_opt = ['--user', "#{@repo.first_credential}:#{@repo.second_credential}"]
|
39
|
+
else
|
40
|
+
user_opt = []
|
41
|
+
end
|
42
|
+
|
43
|
+
args = ['--fail', '--silent', '--show-error', '--insecure', '--location']
|
44
|
+
args += user_opt
|
45
|
+
args += ['--output', "#{@current_repo_dir}/#{filename}", @repo.url]
|
46
|
+
|
38
47
|
FileUtils.mkdir_p(@current_repo_dir)
|
39
|
-
res = @watcher.launch_and_watch(
|
48
|
+
res = @watcher.launch_and_watch('curl', args, @current_repo_dir)
|
40
49
|
handle_watcher_result(res, 'Download')
|
41
50
|
if succeeded?
|
42
51
|
unzip_opt = case @repo.url[/\.(.*)$/]
|
@@ -45,9 +54,8 @@ module RightScale
|
|
45
54
|
else ''
|
46
55
|
end
|
47
56
|
Dir.chdir(@current_repo_dir) do
|
48
|
-
|
49
|
-
res
|
50
|
-
@errors << res if $? != 0
|
57
|
+
res = run('tar', "x#{unzip_opt}f", filename)
|
58
|
+
@errors << res unless $?.success?
|
51
59
|
File.delete(filename)
|
52
60
|
end
|
53
61
|
end
|
@@ -37,7 +37,7 @@ module RightScale
|
|
37
37
|
# msysgit or else a native Windows implementation such as Git#
|
38
38
|
return false if (is_windows? || !File.directory?(@current_repo_dir))
|
39
39
|
Dir.chdir(@current_repo_dir) do
|
40
|
-
remote_url =
|
40
|
+
remote_url = run('git', 'config', '--get', 'remote.origin.url').chomp
|
41
41
|
$?.success? && remote_url == @repo.url
|
42
42
|
end
|
43
43
|
end
|
@@ -67,7 +67,7 @@ module RightScale
|
|
67
67
|
if is_tag && is_branch
|
68
68
|
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
69
69
|
elsif !is_tag && !is_branch
|
70
|
-
current_sha =
|
70
|
+
current_sha = run('git', 'rev-parse', 'HEAD').chomp
|
71
71
|
if current_sha == @repo.tag
|
72
72
|
@callback.call("Nothing to update: already using #{@repo.tag}", is_step=false) if @callback
|
73
73
|
return true
|
@@ -93,8 +93,10 @@ module RightScale
|
|
93
93
|
end
|
94
94
|
|
95
95
|
if !@incremental && succeeded?
|
96
|
-
|
97
|
-
|
96
|
+
args = ['clone', '--quiet', '--depth', '1', @repo.url, @current_repo_dir]
|
97
|
+
ENV['GIT_SSH'] = @ssh_cmd
|
98
|
+
res = @watcher.launch_and_watch('git', args, @current_repo_dir)
|
99
|
+
ENV['GIT_SSH'] = nil
|
98
100
|
handle_watcher_result(res, 'git clone')
|
99
101
|
if has_tag && succeeded?
|
100
102
|
Dir.chdir(@current_repo_dir) do
|
@@ -109,8 +111,8 @@ module RightScale
|
|
109
111
|
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
110
112
|
elsif is_branch
|
111
113
|
if !on_branch
|
112
|
-
output =
|
113
|
-
@errors << output
|
114
|
+
output = run('git', 'branch', @repo.tag, "origin/#{repo.tag}")
|
115
|
+
@errors << output unless $?.success?
|
114
116
|
end
|
115
117
|
elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
|
116
118
|
git_fetch(:depth => 2**31 -1)
|
@@ -167,7 +169,8 @@ module RightScale
|
|
167
169
|
ssh = File.join(ssh_dir, 'ssh')
|
168
170
|
File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
|
169
171
|
File.chmod(0755, ssh)
|
170
|
-
|
172
|
+
|
173
|
+
return ssh
|
171
174
|
end
|
172
175
|
|
173
176
|
# Prepare SSH for git on Windows
|
@@ -200,7 +203,7 @@ module RightScale
|
|
200
203
|
# "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
|
201
204
|
# still a mystery why this doesn't work properly in windows.
|
202
205
|
# so make a ssh call which creates the proper "known_hosts" file.
|
203
|
-
|
206
|
+
run('ssh', '-o', 'StrictHostKeyChecking=no', repo.url.split(':').first)
|
204
207
|
end
|
205
208
|
return ''
|
206
209
|
end
|
@@ -222,8 +225,10 @@ module RightScale
|
|
222
225
|
remote = opts[:remote_tag]
|
223
226
|
remote = 'master' if remote.nil? || remote.rstrip.empty?
|
224
227
|
action = (opts[:merge] ? 'pull' : 'fetch')
|
225
|
-
|
226
|
-
|
228
|
+
args = [action, '--tags', '--depth', depth, 'origin', remote]
|
229
|
+
ENV['GIT_SSH'] = @ssh_cmd
|
230
|
+
res = @watcher.launch_and_watch('git', args, @current_repo_dir)
|
231
|
+
ENV['GIT_SSH'] = nil
|
227
232
|
handle_watcher_result(res, "git #{action}")
|
228
233
|
end
|
229
234
|
|
@@ -237,8 +242,8 @@ module RightScale
|
|
237
242
|
# === Return
|
238
243
|
# output(String):: Output of git command
|
239
244
|
def git_checkout(tag)
|
240
|
-
output =
|
241
|
-
@errors << output
|
245
|
+
output = run('git', 'checkout', tag)
|
246
|
+
@errors << output unless $?.success?
|
242
247
|
output
|
243
248
|
end
|
244
249
|
|
@@ -255,9 +260,9 @@ module RightScale
|
|
255
260
|
def analyze_repo_tag
|
256
261
|
is_tag = is_branch = on_branch = nil
|
257
262
|
begin
|
258
|
-
is_tag =
|
259
|
-
is_branch =
|
260
|
-
on_branch = is_branch &&
|
263
|
+
is_tag = run('git', 'tag').split("\n").include?(@repo.tag)
|
264
|
+
is_branch = run('git', 'branch', '-r').split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
|
265
|
+
on_branch = is_branch && !!run('git', 'branch').split("\n").include?("* #{@repo.tag}")
|
261
266
|
rescue Exception => e
|
262
267
|
@errors << "Analysis of repository tag failed with: #{e.message}"
|
263
268
|
end
|
@@ -38,13 +38,13 @@ module RightScale
|
|
38
38
|
cookbooks_path = [ cookbooks_path ] unless cookbooks_path.is_a?(Array)
|
39
39
|
if cookbooks_path.empty?
|
40
40
|
Dir.chdir(@current_repo_dir) do
|
41
|
-
info =
|
41
|
+
info = run('svn', 'info')
|
42
42
|
inc = $?.success? && info =~ (/^URL: (.*)$/) && $1 == @repo.url
|
43
43
|
end
|
44
44
|
else
|
45
45
|
cookbooks_path.each do |path|
|
46
46
|
Dir.chdir(File.join(@current_repo_dir, path)) do
|
47
|
-
info =
|
47
|
+
info = run('svn', 'info')
|
48
48
|
inc = $?.success? && info =~ (/^URL: (.*)$/) && $1 == File.join(@repo.url, path)
|
49
49
|
break unless inc
|
50
50
|
end
|
@@ -64,14 +64,14 @@ module RightScale
|
|
64
64
|
cookbooks_path = repo.cookbooks_path || []
|
65
65
|
cookbooks_path = [ cookbooks_path ] unless cookbooks_path.is_a?(Array)
|
66
66
|
if @incremental
|
67
|
-
|
68
|
-
(!@repo.tag.nil? && !@repo.tag.empty?
|
69
|
-
|
70
|
-
|
71
|
-
|
67
|
+
args = ['update', '--no-auth-cache', '--non-interactive', '--quiet']
|
68
|
+
args += ['--revision', @repo.tag] if (!@repo.tag.nil? && !@repo.tag.empty?)
|
69
|
+
args += ['--username', @repo.first_credential] if @repo.first_credential
|
70
|
+
args += ['--password', @repo.second_credential] if @repo.second_credential
|
71
|
+
|
72
72
|
if cookbooks_path.empty?
|
73
73
|
Dir.chdir(@current_repo_dir) do
|
74
|
-
res = @watcher.launch_and_watch(
|
74
|
+
res = @watcher.launch_and_watch('svn', args, @current_repo_dir)
|
75
75
|
handle_watcher_result(res, 'SVN update')
|
76
76
|
end
|
77
77
|
else
|
@@ -79,7 +79,7 @@ module RightScale
|
|
79
79
|
break unless succeeded?
|
80
80
|
full_path = File.join(@current_repo_dir, path)
|
81
81
|
Dir.chdir(full_path) do
|
82
|
-
res = @watcher.launch_and_watch(
|
82
|
+
res = @watcher.launch_and_watch('svn', args, @current_repo_dir)
|
83
83
|
handle_watcher_result(res, 'SVN update')
|
84
84
|
end
|
85
85
|
end
|
@@ -87,12 +87,12 @@ module RightScale
|
|
87
87
|
end
|
88
88
|
if !@incremental && succeeded?
|
89
89
|
if cookbooks_path.empty?
|
90
|
-
res = @watcher.launch_and_watch(
|
90
|
+
res = @watcher.launch_and_watch('svn', svn_checkout_args, @current_repo_dir)
|
91
91
|
handle_watcher_result(res, 'SVN checkout')
|
92
92
|
else
|
93
93
|
cookbooks_path.each do |path|
|
94
94
|
break unless succeeded?
|
95
|
-
res = @watcher.launch_and_watch(
|
95
|
+
res = @watcher.launch_and_watch('svn', svn_checkout_args(path), @current_repo_dir)
|
96
96
|
handle_watcher_result(res, 'SVN checkout')
|
97
97
|
end
|
98
98
|
end
|
@@ -107,12 +107,13 @@ module RightScale
|
|
107
107
|
#
|
108
108
|
# === Return
|
109
109
|
# svn_cmd(String):: Corresponding SVN command line
|
110
|
-
def
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
'
|
110
|
+
def svn_checkout_args(path='')
|
111
|
+
args = ['checkout', File.join(@repo.url, path), File.join(@current_repo_dir, path),
|
112
|
+
'--no-auth-cache', '--non-interactive', '--quiet']
|
113
|
+
args += ['--revision', @repo.tag] if !@repo.tag.nil? && !@repo.tag.empty?
|
114
|
+
args += ['--username', @repo.first_credential] if @repo.first_credential
|
115
|
+
args += ['--password', @repo.second_credential] if @repo.second_credential
|
116
|
+
return args
|
116
117
|
end
|
117
118
|
end
|
118
119
|
end
|
@@ -75,13 +75,13 @@ module RightScale
|
|
75
75
|
#
|
76
76
|
# === Return
|
77
77
|
# res(RightScale::WatchStatus):: Outcome of watch, see RightScale::WatchStatus
|
78
|
-
def launch_and_watch(cmd, dest_dir)
|
78
|
+
def launch_and_watch(cmd, args, dest_dir)
|
79
79
|
exit_code = nil
|
80
80
|
output = ''
|
81
81
|
monitor = ProcessMonitor.new
|
82
82
|
|
83
83
|
# Run external process and monitor it in a new thread, platform specific
|
84
|
-
pid = monitor.spawn(cmd) do |data|
|
84
|
+
pid = monitor.spawn(cmd, *args) do |data|
|
85
85
|
output << data[:output] if data[:output]
|
86
86
|
exit_code = data[:exit_code] if data.include?(:exit_code)
|
87
87
|
end
|
@@ -35,7 +35,11 @@ module RightScale
|
|
35
35
|
# Spawn given process and callback given block with output and exit code
|
36
36
|
#
|
37
37
|
# === Parameters
|
38
|
-
# cmd(String)::
|
38
|
+
# cmd(String):: Name of the command to run
|
39
|
+
# arg1(String):: Optional, first command-line argumument
|
40
|
+
# arg2(String):: Optional, first command-line argumument
|
41
|
+
# ...
|
42
|
+
# argN(String):: Optional, Nth command-line argumument
|
39
43
|
#
|
40
44
|
# === Block
|
41
45
|
# Given block should take one argument which is a hash which may contain
|
@@ -45,7 +49,10 @@ module RightScale
|
|
45
49
|
#
|
46
50
|
# === Return
|
47
51
|
# pid(Integer):: Spawned process pid
|
48
|
-
def spawn(cmd)
|
52
|
+
def spawn(cmd, *args)
|
53
|
+
args = args.map { |a| a.to_s }
|
54
|
+
cmd = ([cmd] + args).join(' ')
|
55
|
+
|
49
56
|
# Run external process and monitor it in a new thread
|
50
57
|
@io = IO.popen(cmd)
|
51
58
|
@handle = OpenProcess(PROCESS_ALL_ACCESS, 0, @io.pid)
|
data/right_scraper.gemspec
CHANGED
@@ -23,7 +23,7 @@ require 'rubygems'
|
|
23
23
|
|
24
24
|
spec = Gem::Specification.new do |spec|
|
25
25
|
spec.name = 'right_scraper'
|
26
|
-
spec.version = '1.0.
|
26
|
+
spec.version = '1.0.24'
|
27
27
|
spec.authors = ['Raphael Simon']
|
28
28
|
spec.email = 'raphael@rightscale.com'
|
29
29
|
spec.homepage = 'https://github.com/rightscale/right_scraper'
|
data/spec/watcher_spec.rb
CHANGED
@@ -37,7 +37,8 @@ describe RightScale::Watcher do
|
|
37
37
|
|
38
38
|
it 'should launch and watch well-behaved processes' do
|
39
39
|
watcher = RightScale::Watcher.new(max_bytes=1, max_seconds=5)
|
40
|
-
|
40
|
+
ruby = "trap('INT', 'IGNORE'); puts 42; exit 42"
|
41
|
+
status = watcher.launch_and_watch('ruby', ['-e', ruby], @dest_dir)
|
41
42
|
status.status.should == :success
|
42
43
|
status.exit_code.should == 42
|
43
44
|
status.output.should == "42\n"
|
@@ -45,7 +46,8 @@ describe RightScale::Watcher do
|
|
45
46
|
|
46
47
|
it 'should report timeouts' do
|
47
48
|
watcher = RightScale::Watcher.new(max_bytes=1, max_seconds=2)
|
48
|
-
|
49
|
+
ruby = "trap('INT', 'IGNORE'); STDOUT.sync = true; puts 42; sleep 5"
|
50
|
+
status = watcher.launch_and_watch('ruby', ['-e', ruby], @dest_dir)
|
49
51
|
status.status.should == :timeout
|
50
52
|
status.exit_code.should == -1
|
51
53
|
status.output.should == "42\n"
|
@@ -53,7 +55,8 @@ describe RightScale::Watcher do
|
|
53
55
|
|
54
56
|
it 'should report size exceeded' do
|
55
57
|
watcher = RightScale::Watcher.new(max_bytes=1, max_seconds=5)
|
56
|
-
|
58
|
+
ruby = "trap('INT', 'IGNORE'); STDOUT.sync = true; puts 42; File.open(File.join('#{@dest_dir}', 'test'), 'w') { |f| f.puts 'MORE THAN 2 CHARS' }; sleep 5 rescue nil"
|
59
|
+
status = watcher.launch_and_watch('ruby', ['-e', ruby], @dest_dir)
|
57
60
|
status.status.should == :size_exceeded
|
58
61
|
status.exit_code.should == -1
|
59
62
|
status.output.should == "42\n"
|
@@ -61,7 +64,8 @@ describe RightScale::Watcher do
|
|
61
64
|
|
62
65
|
it 'should allow infinite size and timeout' do
|
63
66
|
watcher = RightScale::Watcher.new(max_bytes=-1, max_seconds=-1)
|
64
|
-
|
67
|
+
ruby = "trap('INT', 'IGNORE'); STDOUT.sync = true; puts 42; File.open(File.join('#{@dest_dir}', 'test'), 'w') { |f| f.puts 'MORE THAN 2 CHARS' }; sleep 2 rescue nil"
|
68
|
+
status = watcher.launch_and_watch('ruby', ['-e', ruby], @dest_dir)
|
65
69
|
status.status.should == :success
|
66
70
|
status.exit_code.should == 0
|
67
71
|
status.output.should == "42\n"
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: right_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 39
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 24
|
10
|
+
version: 1.0.24
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Raphael Simon
|
@@ -9,7 +15,7 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-08-
|
18
|
+
date: 2010-08-31 00:00:00 -07:00
|
13
19
|
default_executable:
|
14
20
|
dependencies: []
|
15
21
|
|
@@ -62,21 +68,29 @@ rdoc_options:
|
|
62
68
|
require_paths:
|
63
69
|
- lib
|
64
70
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
65
72
|
requirements:
|
66
73
|
- - ">="
|
67
74
|
- !ruby/object:Gem::Version
|
75
|
+
hash: 59
|
76
|
+
segments:
|
77
|
+
- 1
|
78
|
+
- 8
|
79
|
+
- 6
|
68
80
|
version: 1.8.6
|
69
|
-
version:
|
70
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
71
83
|
requirements:
|
72
84
|
- - ">="
|
73
85
|
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
74
89
|
version: "0"
|
75
|
-
version:
|
76
90
|
requirements: []
|
77
91
|
|
78
92
|
rubyforge_project: right_scraper
|
79
|
-
rubygems_version: 1.3.
|
93
|
+
rubygems_version: 1.3.7
|
80
94
|
signing_key:
|
81
95
|
specification_version: 3
|
82
96
|
summary: Download and update remote repositories
|