right_scraper 1.0.9 → 1.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/right_scraper/linux/process_monitor.rb +67 -0
- data/lib/right_scraper/scraper_base.rb +1 -1
- data/lib/right_scraper/scrapers/git_scraper.rb +1 -1
- data/lib/right_scraper/watcher.rb +17 -15
- data/lib/right_scraper/win32/process_monitor.rb +91 -0
- data/right_scraper.gemspec +1 -1
- data/spec/download_scraper_spec.rb +1 -0
- data/spec/scraper_spec.rb +11 -9
- data/spec/svn_scraper_spec.rb +1 -0
- data/spec/watcher_spec.rb +1 -1
- metadata +4 -2
@@ -0,0 +1,67 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module RightScale
|
25
|
+
|
26
|
+
# *nix specific watcher implementation
|
27
|
+
class ProcessMonitor
|
28
|
+
|
29
|
+
# Spawn given process and callback given block with output and exit code
|
30
|
+
#
|
31
|
+
# === Parameters
|
32
|
+
# cmd(String):: Process command line (including arguments)
|
33
|
+
#
|
34
|
+
# === Block
|
35
|
+
# Given block should take one argument which is a hash which may contain
|
36
|
+
# the keys :output and :exit_code. The value associated with :output is a chunk
|
37
|
+
# of output while the value associated with :exit_code is the process exit code
|
38
|
+
# This block won't be called anymore once the :exit_code key has associated value
|
39
|
+
#
|
40
|
+
# === Return
|
41
|
+
# pid(Integer):: Spawned process pid
|
42
|
+
def spawn(cmd)
|
43
|
+
# Run external process and monitor it in a new thread
|
44
|
+
@io = IO.popen(cmd)
|
45
|
+
@reader = Thread.new do
|
46
|
+
o = @io.read
|
47
|
+
until o == ''
|
48
|
+
yield(:output => o)
|
49
|
+
o = @io.read
|
50
|
+
end
|
51
|
+
Process.wait(@io.pid)
|
52
|
+
yield(:exit_code => $?.exitstatus)
|
53
|
+
end
|
54
|
+
@io.pid
|
55
|
+
end
|
56
|
+
|
57
|
+
# Close io and join reader thread
|
58
|
+
#
|
59
|
+
# === Return
|
60
|
+
# true:: Always return true
|
61
|
+
def cleanup
|
62
|
+
@reader.join
|
63
|
+
@io.close
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -56,7 +56,7 @@ module RightScale
|
|
56
56
|
# Each scraper implementation should override scrape_imp which is called
|
57
57
|
# after this method initializes all the scraper attributes properly.
|
58
58
|
# See RightScale::Scraper#scrape
|
59
|
-
def scrape(repo, incremental, &callback)
|
59
|
+
def scrape(repo, incremental=true, &callback)
|
60
60
|
@repo = repo
|
61
61
|
@callback = callback
|
62
62
|
@current_repo_dir = ScraperBase.repo_dir(root_dir, repo)
|
@@ -182,7 +182,7 @@ module RightScale
|
|
182
182
|
# "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
|
183
183
|
# still a mystery why this doesn't work properly in windows.
|
184
184
|
# so make a ssh call which creates the proper "known_hosts" file.
|
185
|
-
system("ssh -o StrictHostKeyChecking=no #{repo.url.split(':').first} exit")
|
185
|
+
system("ssh -o StrictHostKeyChecking=no #{repo.url.split(':').first} exit 2>&1")
|
186
186
|
end
|
187
187
|
return ''
|
188
188
|
end
|
@@ -22,6 +22,11 @@
|
|
22
22
|
#++
|
23
23
|
|
24
24
|
require 'find'
|
25
|
+
if RUBY_PLATFORM =~ /mswin/
|
26
|
+
require File.join(File.dirname(__FILE__), 'win32', 'process_monitor')
|
27
|
+
else
|
28
|
+
require File.join(File.dirname(__FILE__), 'linux', 'process_monitor')
|
29
|
+
end
|
25
30
|
|
26
31
|
module RightScale
|
27
32
|
|
@@ -72,18 +77,13 @@ module RightScale
|
|
72
77
|
# res(RightScale::WatchStatus):: Outcome of watch, see RightScale::WatchStatus
|
73
78
|
def launch_and_watch(cmd, dest_dir)
|
74
79
|
exit_code = nil
|
75
|
-
output
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
output += o
|
83
|
-
o = io.read
|
84
|
-
end
|
85
|
-
Process.wait(io.pid)
|
86
|
-
exit_code = $?.exitstatus
|
80
|
+
output = ''
|
81
|
+
monitor = ProcessMonitor.new
|
82
|
+
|
83
|
+
# Run external process and monitor it in a new thread, platform specific
|
84
|
+
pid = monitor.spawn(cmd) do |data|
|
85
|
+
output << data[:output] if data.include?(:output)
|
86
|
+
exit_code = data[:exit_code] if data.include?(:exit_code)
|
87
87
|
end
|
88
88
|
|
89
89
|
# Loop until process is done or times out or takes too much space
|
@@ -106,10 +106,12 @@ module RightScale
|
|
106
106
|
else
|
107
107
|
exit_status = -1
|
108
108
|
outcome = (timed_out ? :timeout : :size_exceeded)
|
109
|
-
Process.kill('INT',
|
109
|
+
Process.kill('INT', pid)
|
110
110
|
end
|
111
|
-
|
112
|
-
|
111
|
+
|
112
|
+
# Cleanup any open handle etc., platform specific
|
113
|
+
monitor.cleanup
|
114
|
+
|
113
115
|
res = WatchStatus.new(outcome, exit_status, output)
|
114
116
|
end
|
115
117
|
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'win32/process'
|
25
|
+
|
26
|
+
module RightScale
|
27
|
+
|
28
|
+
# Windows specific watcher implementation
|
29
|
+
class ProcessMonitor
|
30
|
+
|
31
|
+
include Windows::Process
|
32
|
+
include Windows::Synchronize
|
33
|
+
include Windows::Handle
|
34
|
+
|
35
|
+
# Spawn given process and callback given block with output and exit code
|
36
|
+
#
|
37
|
+
# === Parameters
|
38
|
+
# cmd(String):: Process command line (including arguments)
|
39
|
+
#
|
40
|
+
# === Block
|
41
|
+
# Given block should take one argument which is a hash which may contain
|
42
|
+
# the keys :output and :exit_code. The value associated with :output is a chunk
|
43
|
+
# of output while the value associated with :exit_code is the process exit code
|
44
|
+
# This block won't be called anymore once the :exit_code key has associated value
|
45
|
+
#
|
46
|
+
# === Return
|
47
|
+
# pid(Integer):: Spawned process pid
|
48
|
+
def spawn(cmd)
|
49
|
+
# Run external process and monitor it in a new thread
|
50
|
+
@io = IO.popen(cmd)
|
51
|
+
@handle = OpenProcess(PROCESS_ALL_ACCESS, 0, @io.pid)
|
52
|
+
case @handle
|
53
|
+
when INVALID_HANDLE_VALUE
|
54
|
+
# Something bad happened
|
55
|
+
yield(:exit_code => 1)
|
56
|
+
when 0
|
57
|
+
# Process already finished
|
58
|
+
yield(:exit_code => 0)
|
59
|
+
else
|
60
|
+
# Start output read
|
61
|
+
@reader = Thread.new do
|
62
|
+
o = @io.read
|
63
|
+
until o == ''
|
64
|
+
yield(:output => o)
|
65
|
+
o = @io.read
|
66
|
+
end
|
67
|
+
status = WaitForSingleObject(@handle, INFINITE)
|
68
|
+
exit_code = [0].pack('L')
|
69
|
+
if GetExitCodeProcess(@handle, exit_code)
|
70
|
+
exit_code = exit_code.unpack('L').first
|
71
|
+
else
|
72
|
+
exit_code = 1
|
73
|
+
end
|
74
|
+
yield(:exit_code => exit_code)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
@io.pid
|
78
|
+
end
|
79
|
+
|
80
|
+
# Cleanup underlying handle
|
81
|
+
#
|
82
|
+
# === Return
|
83
|
+
# true:: Always return true
|
84
|
+
def cleanup
|
85
|
+
@reader.join
|
86
|
+
CloseHandle(@handle) if @handle > 0
|
87
|
+
@io.close
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
data/right_scraper.gemspec
CHANGED
@@ -23,7 +23,7 @@ require 'rubygems'
|
|
23
23
|
|
24
24
|
spec = Gem::Specification.new do |spec|
|
25
25
|
spec.name = 'right_scraper'
|
26
|
-
spec.version = '1.0.
|
26
|
+
spec.version = '1.0.10'
|
27
27
|
spec.authors = ['Raphael Simon']
|
28
28
|
spec.email = 'raphael@rightscale.com'
|
29
29
|
spec.homepage = 'https://github.com/rightscale/right_scraper'
|
data/spec/scraper_spec.rb
CHANGED
@@ -22,10 +22,12 @@
|
|
22
22
|
#++
|
23
23
|
|
24
24
|
require File.join(File.dirname(__FILE__), 'spec_helper')
|
25
|
-
require File.join('
|
26
|
-
require File.join('scrapers', '
|
27
|
-
require File.join('scrapers', '
|
28
|
-
require '
|
25
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'right_scraper', 'scraper_base')
|
26
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'right_scraper', 'scrapers', 'git_scraper')
|
27
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'right_scraper', 'scrapers', 'svn_scraper')
|
28
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'right_scraper', 'scrapers', 'download_scraper')
|
29
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'right_scraper', 'repository')
|
30
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'right_scraper', 'scraper')
|
29
31
|
|
30
32
|
describe RightScale::Scraper do
|
31
33
|
|
@@ -33,17 +35,17 @@ describe RightScale::Scraper do
|
|
33
35
|
@scraper = RightScale::Scraper.new('/tmp')
|
34
36
|
@mock_scraper = flexmock('MockScraper')
|
35
37
|
mock_scraper_klass = flexmock('MockScraperClass', :new => @mock_scraper)
|
36
|
-
RightScale::SCRAPERS.merge!(
|
38
|
+
RightScale::SCRAPERS.merge!('mock' => mock_scraper_klass)
|
37
39
|
end
|
38
40
|
|
39
41
|
after(:all) do
|
40
|
-
RightScale::SCRAPERS.delete(
|
42
|
+
RightScale::SCRAPERS.delete('mock')
|
41
43
|
end
|
42
44
|
|
43
45
|
it 'should scrape' do
|
44
46
|
repo = RightScale::Repository.new
|
45
47
|
repo.repo_type = :mock
|
46
|
-
@mock_scraper.should_receive(:scrape).with(repo, Proc).and_return(true)
|
48
|
+
@mock_scraper.should_receive(:scrape).with(repo, true, Proc).and_return(true)
|
47
49
|
@mock_scraper.should_receive(:succeeded?).and_return(true)
|
48
50
|
@mock_scraper.should_receive(:current_repo_dir).and_return('42')
|
49
51
|
@scraper.scrape(repo) { }.should be_true
|
@@ -51,14 +53,14 @@ describe RightScale::Scraper do
|
|
51
53
|
end
|
52
54
|
|
53
55
|
it 'should scrape from a hash' do
|
54
|
-
@mock_scraper.should_receive(:scrape).with(RightScale::Repository, Proc).and_return(true)
|
56
|
+
@mock_scraper.should_receive(:scrape).with(RightScale::Repository, true, Proc).and_return(true)
|
55
57
|
@mock_scraper.should_receive(:succeeded?).and_return(true)
|
56
58
|
@mock_scraper.should_receive(:current_repo_dir).and_return('42')
|
57
59
|
@scraper.scrape({:repo_type => :mock}) { }.should be_true
|
58
60
|
end
|
59
61
|
|
60
62
|
it 'should report failures' do
|
61
|
-
@mock_scraper.should_receive(:scrape).with(RightScale::Repository, Proc).and_return(true)
|
63
|
+
@mock_scraper.should_receive(:scrape).with(RightScale::Repository, true, Proc).and_return(true)
|
62
64
|
@mock_scraper.should_receive(:succeeded?).and_return(false)
|
63
65
|
@mock_scraper.should_receive(:current_repo_dir).and_return('42')
|
64
66
|
@scraper.scrape({:repo_type => :mock}) { }.should be_false
|
data/spec/svn_scraper_spec.rb
CHANGED
data/spec/watcher_spec.rb
CHANGED
@@ -45,7 +45,7 @@ describe RightScale::Watcher do
|
|
45
45
|
|
46
46
|
it 'should report timeouts' do
|
47
47
|
watcher = RightScale::Watcher.new(max_bytes=1, max_seconds=2)
|
48
|
-
status = watcher.launch_and_watch('ruby -e "STDOUT.sync = true; puts 42; sleep 5"', @dest_dir)
|
48
|
+
status = watcher.launch_and_watch('ruby -e "STDOUT.sync = true; puts 42; sleep 5" 2>&1', @dest_dir)
|
49
49
|
status.status.should == :timeout
|
50
50
|
status.exit_code.should == -1
|
51
51
|
status.output.should == "42\n"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: right_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Raphael Simon
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-15 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- README.rdoc
|
27
27
|
- Rakefile
|
28
28
|
- lib/right_scraper.rb
|
29
|
+
- lib/right_scraper/linux/process_monitor.rb
|
29
30
|
- lib/right_scraper/repository.rb
|
30
31
|
- lib/right_scraper/scraper.rb
|
31
32
|
- lib/right_scraper/scraper_base.rb
|
@@ -33,6 +34,7 @@ files:
|
|
33
34
|
- lib/right_scraper/scrapers/git_scraper.rb
|
34
35
|
- lib/right_scraper/scrapers/svn_scraper.rb
|
35
36
|
- lib/right_scraper/watcher.rb
|
37
|
+
- lib/right_scraper/win32/process_monitor.rb
|
36
38
|
- right_scraper.gemspec
|
37
39
|
- spec/download_scraper_spec.rb
|
38
40
|
- spec/git_scraper_spec.rb
|