right_scraper 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +4 -1
- data/lib/right_scraper/scraper_base.rb +1 -1
- data/lib/right_scraper/scrapers/download_scraper.rb +3 -3
- data/lib/right_scraper/scrapers/git_scraper.rb +54 -17
- data/lib/right_scraper/scrapers/svn_scraper.rb +1 -1
- data/right_scraper.gemspec +1 -1
- data/spec/download_scraper_spec.rb +8 -3
- data/spec/git_scraper_spec.rb +6 -6
- data/spec/spec.opts +2 -0
- data/spec/svn_scraper_spec.rb +7 -6
- metadata +3 -2
data/Rakefile
CHANGED
@@ -33,6 +33,9 @@ task :default => 'spec'
|
|
33
33
|
desc "Run unit tests"
|
34
34
|
Spec::Rake::SpecTask.new do |t|
|
35
35
|
t.spec_files = Dir['**/*_spec.rb']
|
36
|
+
t.spec_opts = lambda do
|
37
|
+
IO.readlines(File.join(File.dirname(__FILE__), 'spec', 'spec.opts')).map {|l| l.chomp.split " "}.flatten
|
38
|
+
end
|
36
39
|
end
|
37
40
|
|
38
41
|
desc "Run unit tests with RCov"
|
@@ -40,7 +43,7 @@ Spec::Rake::SpecTask.new(:rcov) do |t|
|
|
40
43
|
t.spec_files = Dir['**/*_spec.rb']
|
41
44
|
t.rcov = true
|
42
45
|
t.rcov_opts = lambda do
|
43
|
-
IO.readlines(File.join(File.dirname(__FILE__), 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
|
46
|
+
IO.readlines(File.join(File.dirname(__FILE__), 'spec', 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
@@ -34,14 +34,14 @@ module RightScale
|
|
34
34
|
@callback.call(msg, is_step=true) if @callback
|
35
35
|
filename = @repo.url.split('/').last
|
36
36
|
user_opt = @repo.first_credential && @repo.second_credential ? "--user #{@repo.first_credential}:#{@repo.second_credential}" : ''
|
37
|
-
cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output
|
37
|
+
cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output \"#{@current_repo_dir}/#{filename}\" '#{@repo.url}' 2>&1"
|
38
38
|
FileUtils.mkdir_p(@current_repo_dir)
|
39
39
|
res = `#{cmd}`
|
40
40
|
@errors << res if $? != 0
|
41
41
|
if succeeded?
|
42
42
|
unzip_opt = case @repo.url[/\.(.*)$/]
|
43
|
-
when 'bzip', 'bzip2' then 'j'
|
44
|
-
when 'tgz', 'gzip' then 'z'
|
43
|
+
when 'bzip', 'bzip2', 'bz2' then 'j'
|
44
|
+
when 'tgz', 'gzip', 'gz' then 'z'
|
45
45
|
else ''
|
46
46
|
end
|
47
47
|
Dir.chdir(@current_repo_dir) do
|
@@ -44,41 +44,41 @@ module RightScale
|
|
44
44
|
# === Return
|
45
45
|
# true:: Always return true
|
46
46
|
def scrape_imp
|
47
|
-
msg = @incremental ?
|
47
|
+
msg = @incremental ? 'Pulling ' : 'Cloning '
|
48
48
|
msg += "git repository '#{@repo.display_name}'"
|
49
49
|
@callback.call(msg, is_step=true) if @callback
|
50
|
-
ssh_cmd
|
51
|
-
res
|
52
|
-
is_tag
|
50
|
+
ssh_cmd = ssh_command
|
51
|
+
res = ''
|
52
|
+
is_tag = nil
|
53
53
|
is_branch = nil
|
54
54
|
|
55
55
|
if @incremental
|
56
56
|
Dir.chdir(@current_repo_dir) do
|
57
|
-
is_tag, is_branch, res =
|
57
|
+
is_tag, is_branch, res = git_fetch(ssh_cmd)
|
58
58
|
if !is_tag && !is_branch
|
59
|
-
@callback.call(
|
59
|
+
@callback.call('Nothing to update: repo tag refers to neither a branch nor a tag', is_step=false)
|
60
60
|
return true
|
61
61
|
end
|
62
62
|
if is_tag && is_branch
|
63
63
|
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
64
64
|
else
|
65
65
|
tag = @repo.tag.nil? || @repo.tag.empty? ? 'master' : @repo.tag
|
66
|
-
res +=
|
66
|
+
res += `git checkout #{tag} 2>&1`
|
67
67
|
if $? != 0
|
68
|
-
@callback.call("Failed to
|
68
|
+
@callback.call("Failed to update repo: #{res}, falling back to cloning", is_step=false) if @callback
|
69
69
|
FileUtils.rm_rf(@current_repo_dir)
|
70
70
|
@incremental = false
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
74
74
|
end
|
75
|
-
if !@incremental
|
76
|
-
res += `#{ssh_cmd} git clone --quiet --depth 1 #{@repo.url} #{@current_repo_dir} 2>&1`
|
75
|
+
if !@incremental && succeeded?
|
76
|
+
res += `#{ssh_cmd} git clone --quiet --depth 1 "#{@repo.url}" "#{@current_repo_dir}" 2>&1`
|
77
77
|
@errors << res if $? != 0
|
78
78
|
if !@repo.tag.nil? && !@repo.tag.empty? && @repo.tag != 'master' && succeeded?
|
79
79
|
Dir.chdir(@current_repo_dir) do
|
80
80
|
if is_tag.nil?
|
81
|
-
is_tag, is_branch, out =
|
81
|
+
is_tag, is_branch, out = git_fetch(ssh_cmd)
|
82
82
|
res += out
|
83
83
|
end
|
84
84
|
if is_tag && is_branch
|
@@ -118,12 +118,13 @@ module RightScale
|
|
118
118
|
options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
|
119
119
|
end
|
120
120
|
|
121
|
-
# Store
|
121
|
+
# Store private SSH key into temporary folder and create temporary script
|
122
122
|
# that wraps SSH and uses this key.
|
123
123
|
#
|
124
124
|
# === Return
|
125
125
|
# ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
|
126
126
|
def ssh_command
|
127
|
+
return win32_ssh_command if RUBY_PLATFORM=~/mswin/
|
127
128
|
ssh_dir = File.join(@scrape_dir_path, '.ssh')
|
128
129
|
FileUtils.mkdir_p(ssh_dir)
|
129
130
|
key_content = @repo.first_credential
|
@@ -131,7 +132,7 @@ module RightScale
|
|
131
132
|
# Explicitely disable public key authentication so we don't endup using the system's key
|
132
133
|
options = { :PubkeyAuthentication => 'no' }
|
133
134
|
else
|
134
|
-
ssh_key_path = File.join(ssh_dir, '
|
135
|
+
ssh_key_path = File.join(ssh_dir, 'id_rsa')
|
135
136
|
File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
|
136
137
|
File.chmod(0600, ssh_key_path)
|
137
138
|
options = { :IdentityFile => ssh_key_path }
|
@@ -144,19 +145,55 @@ module RightScale
|
|
144
145
|
"GIT_SSH=#{ssh}"
|
145
146
|
end
|
146
147
|
|
148
|
+
# Prepare SSH for git on Windows
|
149
|
+
# The GIT_SSH trick doesn't seem to work on Windows, instead actually
|
150
|
+
# save the private key in the user ssh folder.
|
151
|
+
# Note: This will override any pre-existing SSH key that was on the system
|
152
|
+
#
|
153
|
+
# === Return
|
154
|
+
# '':: Always return an empty string
|
155
|
+
#
|
156
|
+
# === Raise
|
157
|
+
# Exception:: If the USERPROFILE environment variable is not set
|
158
|
+
def win32_ssh_command
|
159
|
+
key_content = @repo.first_credential
|
160
|
+
unless key_content.nil?
|
161
|
+
# resolve key file path.
|
162
|
+
raise 'Environment variable USERPROFILE is missing' unless ENV['USERPROFILE']
|
163
|
+
user_profile_dir_path = ENV['USERPROFILE']
|
164
|
+
ssh_keys_dir = File.join(user_profile_dir_path, '.ssh')
|
165
|
+
FileUtils.mkdir_p(ssh_keys_dir) unless File.directory?(ssh_keys_dir)
|
166
|
+
ssh_key_file_path = File.join(ssh_keys_dir, 'id_rsa')
|
167
|
+
|
168
|
+
# (re)create key file. must overwrite any existing credentials in case
|
169
|
+
# we are switching repositories and have different credentials for each.
|
170
|
+
File.open(ssh_key_file_path, 'w') { |f| f.puts(key_content) }
|
171
|
+
|
172
|
+
# we need to create the "known_hosts" file or else the process will
|
173
|
+
# halt in windows waiting for a yes/no response to the unknown
|
174
|
+
# git host. this is normally handled by specifying
|
175
|
+
# "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
|
176
|
+
# still a mystery why this doesn't work properly in windows.
|
177
|
+
# so make a ssh call which creates the proper "known_hosts" file.
|
178
|
+
system("ssh -o StrictHostKeyChecking=no #{repo.url.split(':').first} exit")
|
179
|
+
end
|
180
|
+
return ''
|
181
|
+
end
|
182
|
+
|
183
|
+
# Shallow fetch
|
147
184
|
# Resolves whehter repository tag is a git tag or a git branch
|
148
185
|
# Return output of run commands too
|
149
|
-
# Note
|
186
|
+
# Note: Assume that current working directory is a git directory
|
150
187
|
#
|
151
188
|
# === Parameters
|
152
|
-
# ssh_cmd
|
189
|
+
# ssh_cmd(String):: SSH command to be used with git if any
|
153
190
|
#
|
154
191
|
# === Return
|
155
|
-
# res
|
192
|
+
# res(Array)::
|
156
193
|
# - res[0] is true if git repo has a tag with a name corresponding to the repository tag
|
157
194
|
# - res[1] is true if git repo has a branch with a name corresponding to the repository tag
|
158
195
|
# - res[2] contains the git output
|
159
|
-
def
|
196
|
+
def git_fetch(ssh_cmd)
|
160
197
|
return [ false, true, "" ] if @repo.tag.nil? || @repo.tag.empty? || @repo.tag == 'master'
|
161
198
|
output = `#{ssh_cmd} git fetch --tags --depth 1 2>&1`
|
162
199
|
is_tag = `git tag`.split("\n").include?(@repo.tag)
|
@@ -62,7 +62,7 @@ module RightScale
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
if !@incremental
|
65
|
-
svn_cmd = "svn checkout #{@repo.url} #{@current_repo_dir} --non-interactive --quiet" +
|
65
|
+
svn_cmd = "svn checkout \"#{@repo.url}\" \"#{@current_repo_dir}\" --non-interactive --quiet" +
|
66
66
|
(!@repo.tag.nil? && !@repo.tag.empty? ? " --revision #{@repo.tag}" : '') +
|
67
67
|
(@repo.first_credential ? " --username #{@repo.first_credential}" : '') +
|
68
68
|
(@repo.second_credential ? " --password #{@repo.second_credential}" : '') +
|
data/right_scraper.gemspec
CHANGED
@@ -23,7 +23,7 @@ require 'rubygems'
|
|
23
23
|
|
24
24
|
spec = Gem::Specification.new do |spec|
|
25
25
|
spec.name = 'right_scraper'
|
26
|
-
spec.version = '1.0.
|
26
|
+
spec.version = '1.0.2'
|
27
27
|
spec.authors = ['Raphael Simon']
|
28
28
|
spec.email = 'raphael@rightscale.com'
|
29
29
|
spec.homepage = 'https://github.com/rightscale/right_scraper'
|
@@ -21,6 +21,9 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# Not supported on Windows
|
25
|
+
unless RUBY_PLATFORM=~/mswin/
|
26
|
+
|
24
27
|
require File.join(File.dirname(__FILE__), 'spec_helper')
|
25
28
|
require 'scraper_base'
|
26
29
|
require 'repository'
|
@@ -41,7 +44,7 @@ describe RightScale::DownloadScraper do
|
|
41
44
|
create_file_layout(@download_repo_path, @repo_content)
|
42
45
|
@download_file = File.expand_path(File.join(File.dirname(__FILE__), '__download_file.tar'))
|
43
46
|
Dir.chdir(@download_repo_path) do
|
44
|
-
res, status = exec("tar cf #{@download_file} *")
|
47
|
+
res, status = exec("tar cf \"#{@download_file}\" *")
|
45
48
|
raise "Failed to create tarball: #{res}" unless status.success?
|
46
49
|
end
|
47
50
|
end
|
@@ -65,7 +68,7 @@ describe RightScale::DownloadScraper do
|
|
65
68
|
@scraper = RightScale::DownloadScraper.new(@repo_path)
|
66
69
|
@repo = RightScale::Repository.from_hash(:display_name => 'test repo',
|
67
70
|
:repo_type => :download,
|
68
|
-
:url => "file
|
71
|
+
:url => "file:///#{@download_file}")
|
69
72
|
end
|
70
73
|
|
71
74
|
after(:all) do
|
@@ -75,7 +78,7 @@ describe RightScale::DownloadScraper do
|
|
75
78
|
it 'should scrape' do
|
76
79
|
messages = []
|
77
80
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
78
|
-
puts "\n **ERRORS: #{@scraper.
|
81
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
79
82
|
@scraper.succeeded?.should be_true
|
80
83
|
messages.size.should == 1
|
81
84
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
@@ -85,3 +88,5 @@ describe RightScale::DownloadScraper do
|
|
85
88
|
end
|
86
89
|
|
87
90
|
end
|
91
|
+
|
92
|
+
end # unless RUBY_PLATFORM=~/mswin/
|
data/spec/git_scraper_spec.rb
CHANGED
@@ -41,15 +41,15 @@ describe RightScale::GitScraper do
|
|
41
41
|
FileUtils.mkdir_p(@origin_path)
|
42
42
|
Dir.chdir(@origin_path) do
|
43
43
|
res, status = exec("git init --bare")
|
44
|
-
raise "Failed to initialize
|
44
|
+
raise "Failed to initialize bare git repository: #{res}" unless status.success?
|
45
45
|
end
|
46
46
|
FileUtils.rm_rf(@repo_path)
|
47
|
-
res, status = exec("git clone --quiet #{@origin_path} #{@repo_path}")
|
47
|
+
res, status = exec("git clone --quiet \"#{@origin_path}\" \"#{@repo_path}\"")
|
48
48
|
raise "Failed to initialize git repository: #{res}" unless status.success?
|
49
49
|
create_file_layout(@repo_path, @repo_content)
|
50
50
|
Dir.chdir(@repo_path) do
|
51
51
|
res, status = exec("git add .")
|
52
|
-
res, status = exec("git commit --quiet -
|
52
|
+
res, status = exec("git commit --quiet -m \"Initial Commit\"") if status.success?
|
53
53
|
res, status = exec("git push origin master") if status.success?
|
54
54
|
raise "Failed to setup git repository: #{res}" unless status.success?
|
55
55
|
end
|
@@ -83,7 +83,7 @@ describe RightScale::GitScraper do
|
|
83
83
|
it 'should scrape' do
|
84
84
|
messages = []
|
85
85
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
86
|
-
puts "\n **ERRORS: #{@scraper.
|
86
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
87
87
|
@scraper.succeeded?.should be_true
|
88
88
|
messages.size.should == 1
|
89
89
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
@@ -92,11 +92,11 @@ describe RightScale::GitScraper do
|
|
92
92
|
|
93
93
|
it 'should scrape incrementally' do
|
94
94
|
@scraper.scrape(@repo)
|
95
|
-
puts "\n **ERRORS: #{@scraper.
|
95
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
96
96
|
@scraper.incremental_update?.should be_true
|
97
97
|
messages = []
|
98
98
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
99
|
-
puts "\n **ERRORS: #{@scraper.
|
99
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
100
100
|
@scraper.succeeded?.should be_true
|
101
101
|
messages.size.should == 1
|
102
102
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
data/spec/spec.opts
ADDED
data/spec/svn_scraper_spec.rb
CHANGED
@@ -38,15 +38,15 @@ describe RightScale::SvnScraper do
|
|
38
38
|
@repo_path = File.join(File.dirname(__FILE__), '__repo')
|
39
39
|
@repo_content = [ 'file1', { 'folder1' => [ 'file2', 'file3' ] }, { 'folder2' => [ { 'folder3' => [ 'file4' ] } ] } ]
|
40
40
|
FileUtils.rm_rf(@svn_repo_path)
|
41
|
-
res, status = exec("svnadmin create #{@svn_repo_path}")
|
41
|
+
res, status = exec("svnadmin create \"#{@svn_repo_path}\"")
|
42
42
|
raise "Failed to initialize SVN repository: #{res}" unless status.success?
|
43
43
|
FileUtils.rm_rf(@repo_path)
|
44
|
-
res, status = exec("svn checkout file
|
44
|
+
res, status = exec("svn checkout \"file:///#{@svn_repo_path}\" \"#{@repo_path}\"")
|
45
45
|
raise "Failed to checkout repository: #{res}" unless status.success?
|
46
46
|
create_file_layout(@repo_path, @repo_content)
|
47
47
|
Dir.chdir(@repo_path) do
|
48
48
|
res, status = exec("svn add *")
|
49
|
-
res, status = exec("svn commit --quiet -m
|
49
|
+
res, status = exec("svn commit --quiet -m \"Initial Commit\"") if status.success?
|
50
50
|
raise "Failed to setup repository: #{res}" unless status.success?
|
51
51
|
end
|
52
52
|
end
|
@@ -79,7 +79,7 @@ describe RightScale::SvnScraper do
|
|
79
79
|
it 'should scrape' do
|
80
80
|
messages = []
|
81
81
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
82
|
-
puts "\n **ERRORS: #{@scraper.
|
82
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
83
83
|
@scraper.succeeded?.should be_true
|
84
84
|
messages.size.should == 1
|
85
85
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
@@ -87,12 +87,13 @@ describe RightScale::SvnScraper do
|
|
87
87
|
end
|
88
88
|
|
89
89
|
it 'should scrape incrementally' do
|
90
|
+
pending "File URLs comparison on Windows is tricky" if RUBY_PLATFORM=~/mswin/
|
90
91
|
@scraper.scrape(@repo)
|
91
|
-
puts "\n **ERRORS: #{@scraper.
|
92
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
92
93
|
@scraper.incremental_update?.should be_true
|
93
94
|
messages = []
|
94
95
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
95
|
-
puts "\n **ERRORS: #{@scraper.
|
96
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
96
97
|
@scraper.succeeded?.should be_true
|
97
98
|
messages.size.should == 1
|
98
99
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: right_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Raphael Simon
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-02 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- spec/repository_spec.rb
|
40
40
|
- spec/scraper_base_spec.rb
|
41
41
|
- spec/scraper_spec.rb
|
42
|
+
- spec/spec.opts
|
42
43
|
- spec/spec_helper.rb
|
43
44
|
- spec/svn_scraper_spec.rb
|
44
45
|
has_rdoc: true
|