right_scraper 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +4 -1
- data/lib/right_scraper/scraper_base.rb +1 -1
- data/lib/right_scraper/scrapers/download_scraper.rb +3 -3
- data/lib/right_scraper/scrapers/git_scraper.rb +54 -17
- data/lib/right_scraper/scrapers/svn_scraper.rb +1 -1
- data/right_scraper.gemspec +1 -1
- data/spec/download_scraper_spec.rb +8 -3
- data/spec/git_scraper_spec.rb +6 -6
- data/spec/spec.opts +2 -0
- data/spec/svn_scraper_spec.rb +7 -6
- metadata +3 -2
data/Rakefile
CHANGED
@@ -33,6 +33,9 @@ task :default => 'spec'
|
|
33
33
|
desc "Run unit tests"
|
34
34
|
Spec::Rake::SpecTask.new do |t|
|
35
35
|
t.spec_files = Dir['**/*_spec.rb']
|
36
|
+
t.spec_opts = lambda do
|
37
|
+
IO.readlines(File.join(File.dirname(__FILE__), 'spec', 'spec.opts')).map {|l| l.chomp.split " "}.flatten
|
38
|
+
end
|
36
39
|
end
|
37
40
|
|
38
41
|
desc "Run unit tests with RCov"
|
@@ -40,7 +43,7 @@ Spec::Rake::SpecTask.new(:rcov) do |t|
|
|
40
43
|
t.spec_files = Dir['**/*_spec.rb']
|
41
44
|
t.rcov = true
|
42
45
|
t.rcov_opts = lambda do
|
43
|
-
IO.readlines(File.join(File.dirname(__FILE__), 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
|
46
|
+
IO.readlines(File.join(File.dirname(__FILE__), 'spec', 'rcov.opts')).map {|l| l.chomp.split " "}.flatten
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
@@ -34,14 +34,14 @@ module RightScale
|
|
34
34
|
@callback.call(msg, is_step=true) if @callback
|
35
35
|
filename = @repo.url.split('/').last
|
36
36
|
user_opt = @repo.first_credential && @repo.second_credential ? "--user #{@repo.first_credential}:#{@repo.second_credential}" : ''
|
37
|
-
cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output
|
37
|
+
cmd = "curl --fail --silent --show-error --insecure --location #{user_opt} --output \"#{@current_repo_dir}/#{filename}\" '#{@repo.url}' 2>&1"
|
38
38
|
FileUtils.mkdir_p(@current_repo_dir)
|
39
39
|
res = `#{cmd}`
|
40
40
|
@errors << res if $? != 0
|
41
41
|
if succeeded?
|
42
42
|
unzip_opt = case @repo.url[/\.(.*)$/]
|
43
|
-
when 'bzip', 'bzip2' then 'j'
|
44
|
-
when 'tgz', 'gzip' then 'z'
|
43
|
+
when 'bzip', 'bzip2', 'bz2' then 'j'
|
44
|
+
when 'tgz', 'gzip', 'gz' then 'z'
|
45
45
|
else ''
|
46
46
|
end
|
47
47
|
Dir.chdir(@current_repo_dir) do
|
@@ -44,41 +44,41 @@ module RightScale
|
|
44
44
|
# === Return
|
45
45
|
# true:: Always return true
|
46
46
|
def scrape_imp
|
47
|
-
msg = @incremental ?
|
47
|
+
msg = @incremental ? 'Pulling ' : 'Cloning '
|
48
48
|
msg += "git repository '#{@repo.display_name}'"
|
49
49
|
@callback.call(msg, is_step=true) if @callback
|
50
|
-
ssh_cmd
|
51
|
-
res
|
52
|
-
is_tag
|
50
|
+
ssh_cmd = ssh_command
|
51
|
+
res = ''
|
52
|
+
is_tag = nil
|
53
53
|
is_branch = nil
|
54
54
|
|
55
55
|
if @incremental
|
56
56
|
Dir.chdir(@current_repo_dir) do
|
57
|
-
is_tag, is_branch, res =
|
57
|
+
is_tag, is_branch, res = git_fetch(ssh_cmd)
|
58
58
|
if !is_tag && !is_branch
|
59
|
-
@callback.call(
|
59
|
+
@callback.call('Nothing to update: repo tag refers to neither a branch nor a tag', is_step=false)
|
60
60
|
return true
|
61
61
|
end
|
62
62
|
if is_tag && is_branch
|
63
63
|
@errors << 'Repository tag ambiguous: could be git tag or git branch'
|
64
64
|
else
|
65
65
|
tag = @repo.tag.nil? || @repo.tag.empty? ? 'master' : @repo.tag
|
66
|
-
res +=
|
66
|
+
res += `git checkout #{tag} 2>&1`
|
67
67
|
if $? != 0
|
68
|
-
@callback.call("Failed to
|
68
|
+
@callback.call("Failed to update repo: #{res}, falling back to cloning", is_step=false) if @callback
|
69
69
|
FileUtils.rm_rf(@current_repo_dir)
|
70
70
|
@incremental = false
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
74
74
|
end
|
75
|
-
if !@incremental
|
76
|
-
res += `#{ssh_cmd} git clone --quiet --depth 1 #{@repo.url} #{@current_repo_dir} 2>&1`
|
75
|
+
if !@incremental && succeeded?
|
76
|
+
res += `#{ssh_cmd} git clone --quiet --depth 1 "#{@repo.url}" "#{@current_repo_dir}" 2>&1`
|
77
77
|
@errors << res if $? != 0
|
78
78
|
if !@repo.tag.nil? && !@repo.tag.empty? && @repo.tag != 'master' && succeeded?
|
79
79
|
Dir.chdir(@current_repo_dir) do
|
80
80
|
if is_tag.nil?
|
81
|
-
is_tag, is_branch, out =
|
81
|
+
is_tag, is_branch, out = git_fetch(ssh_cmd)
|
82
82
|
res += out
|
83
83
|
end
|
84
84
|
if is_tag && is_branch
|
@@ -118,12 +118,13 @@ module RightScale
|
|
118
118
|
options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
|
119
119
|
end
|
120
120
|
|
121
|
-
# Store
|
121
|
+
# Store private SSH key into temporary folder and create temporary script
|
122
122
|
# that wraps SSH and uses this key.
|
123
123
|
#
|
124
124
|
# === Return
|
125
125
|
# ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
|
126
126
|
def ssh_command
|
127
|
+
return win32_ssh_command if RUBY_PLATFORM=~/mswin/
|
127
128
|
ssh_dir = File.join(@scrape_dir_path, '.ssh')
|
128
129
|
FileUtils.mkdir_p(ssh_dir)
|
129
130
|
key_content = @repo.first_credential
|
@@ -131,7 +132,7 @@ module RightScale
|
|
131
132
|
# Explicitely disable public key authentication so we don't endup using the system's key
|
132
133
|
options = { :PubkeyAuthentication => 'no' }
|
133
134
|
else
|
134
|
-
ssh_key_path = File.join(ssh_dir, '
|
135
|
+
ssh_key_path = File.join(ssh_dir, 'id_rsa')
|
135
136
|
File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
|
136
137
|
File.chmod(0600, ssh_key_path)
|
137
138
|
options = { :IdentityFile => ssh_key_path }
|
@@ -144,19 +145,55 @@ module RightScale
|
|
144
145
|
"GIT_SSH=#{ssh}"
|
145
146
|
end
|
146
147
|
|
148
|
+
# Prepare SSH for git on Windows
|
149
|
+
# The GIT_SSH trick doesn't seem to work on Windows, instead actually
|
150
|
+
# save the private key in the user ssh folder.
|
151
|
+
# Note: This will override any pre-existing SSH key that was on the system
|
152
|
+
#
|
153
|
+
# === Return
|
154
|
+
# '':: Always return an empty string
|
155
|
+
#
|
156
|
+
# === Raise
|
157
|
+
# Exception:: If the USERPROFILE environment variable is not set
|
158
|
+
def win32_ssh_command
|
159
|
+
key_content = @repo.first_credential
|
160
|
+
unless key_content.nil?
|
161
|
+
# resolve key file path.
|
162
|
+
raise 'Environment variable USERPROFILE is missing' unless ENV['USERPROFILE']
|
163
|
+
user_profile_dir_path = ENV['USERPROFILE']
|
164
|
+
ssh_keys_dir = File.join(user_profile_dir_path, '.ssh')
|
165
|
+
FileUtils.mkdir_p(ssh_keys_dir) unless File.directory?(ssh_keys_dir)
|
166
|
+
ssh_key_file_path = File.join(ssh_keys_dir, 'id_rsa')
|
167
|
+
|
168
|
+
# (re)create key file. must overwrite any existing credentials in case
|
169
|
+
# we are switching repositories and have different credentials for each.
|
170
|
+
File.open(ssh_key_file_path, 'w') { |f| f.puts(key_content) }
|
171
|
+
|
172
|
+
# we need to create the "known_hosts" file or else the process will
|
173
|
+
# halt in windows waiting for a yes/no response to the unknown
|
174
|
+
# git host. this is normally handled by specifying
|
175
|
+
# "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
|
176
|
+
# still a mystery why this doesn't work properly in windows.
|
177
|
+
# so make a ssh call which creates the proper "known_hosts" file.
|
178
|
+
system("ssh -o StrictHostKeyChecking=no #{repo.url.split(':').first} exit")
|
179
|
+
end
|
180
|
+
return ''
|
181
|
+
end
|
182
|
+
|
183
|
+
# Shallow fetch
|
147
184
|
# Resolves whehter repository tag is a git tag or a git branch
|
148
185
|
# Return output of run commands too
|
149
|
-
# Note
|
186
|
+
# Note: Assume that current working directory is a git directory
|
150
187
|
#
|
151
188
|
# === Parameters
|
152
|
-
# ssh_cmd
|
189
|
+
# ssh_cmd(String):: SSH command to be used with git if any
|
153
190
|
#
|
154
191
|
# === Return
|
155
|
-
# res
|
192
|
+
# res(Array)::
|
156
193
|
# - res[0] is true if git repo has a tag with a name corresponding to the repository tag
|
157
194
|
# - res[1] is true if git repo has a branch with a name corresponding to the repository tag
|
158
195
|
# - res[2] contains the git output
|
159
|
-
def
|
196
|
+
def git_fetch(ssh_cmd)
|
160
197
|
return [ false, true, "" ] if @repo.tag.nil? || @repo.tag.empty? || @repo.tag == 'master'
|
161
198
|
output = `#{ssh_cmd} git fetch --tags --depth 1 2>&1`
|
162
199
|
is_tag = `git tag`.split("\n").include?(@repo.tag)
|
@@ -62,7 +62,7 @@ module RightScale
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
if !@incremental
|
65
|
-
svn_cmd = "svn checkout #{@repo.url} #{@current_repo_dir} --non-interactive --quiet" +
|
65
|
+
svn_cmd = "svn checkout \"#{@repo.url}\" \"#{@current_repo_dir}\" --non-interactive --quiet" +
|
66
66
|
(!@repo.tag.nil? && !@repo.tag.empty? ? " --revision #{@repo.tag}" : '') +
|
67
67
|
(@repo.first_credential ? " --username #{@repo.first_credential}" : '') +
|
68
68
|
(@repo.second_credential ? " --password #{@repo.second_credential}" : '') +
|
data/right_scraper.gemspec
CHANGED
@@ -23,7 +23,7 @@ require 'rubygems'
|
|
23
23
|
|
24
24
|
spec = Gem::Specification.new do |spec|
|
25
25
|
spec.name = 'right_scraper'
|
26
|
-
spec.version = '1.0.
|
26
|
+
spec.version = '1.0.2'
|
27
27
|
spec.authors = ['Raphael Simon']
|
28
28
|
spec.email = 'raphael@rightscale.com'
|
29
29
|
spec.homepage = 'https://github.com/rightscale/right_scraper'
|
@@ -21,6 +21,9 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# Not supported on Windows
|
25
|
+
unless RUBY_PLATFORM=~/mswin/
|
26
|
+
|
24
27
|
require File.join(File.dirname(__FILE__), 'spec_helper')
|
25
28
|
require 'scraper_base'
|
26
29
|
require 'repository'
|
@@ -41,7 +44,7 @@ describe RightScale::DownloadScraper do
|
|
41
44
|
create_file_layout(@download_repo_path, @repo_content)
|
42
45
|
@download_file = File.expand_path(File.join(File.dirname(__FILE__), '__download_file.tar'))
|
43
46
|
Dir.chdir(@download_repo_path) do
|
44
|
-
res, status = exec("tar cf #{@download_file} *")
|
47
|
+
res, status = exec("tar cf \"#{@download_file}\" *")
|
45
48
|
raise "Failed to create tarball: #{res}" unless status.success?
|
46
49
|
end
|
47
50
|
end
|
@@ -65,7 +68,7 @@ describe RightScale::DownloadScraper do
|
|
65
68
|
@scraper = RightScale::DownloadScraper.new(@repo_path)
|
66
69
|
@repo = RightScale::Repository.from_hash(:display_name => 'test repo',
|
67
70
|
:repo_type => :download,
|
68
|
-
:url => "file
|
71
|
+
:url => "file:///#{@download_file}")
|
69
72
|
end
|
70
73
|
|
71
74
|
after(:all) do
|
@@ -75,7 +78,7 @@ describe RightScale::DownloadScraper do
|
|
75
78
|
it 'should scrape' do
|
76
79
|
messages = []
|
77
80
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
78
|
-
puts "\n **ERRORS: #{@scraper.
|
81
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
79
82
|
@scraper.succeeded?.should be_true
|
80
83
|
messages.size.should == 1
|
81
84
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
@@ -85,3 +88,5 @@ describe RightScale::DownloadScraper do
|
|
85
88
|
end
|
86
89
|
|
87
90
|
end
|
91
|
+
|
92
|
+
end # unless RUBY_PLATFORM=~/mswin/
|
data/spec/git_scraper_spec.rb
CHANGED
@@ -41,15 +41,15 @@ describe RightScale::GitScraper do
|
|
41
41
|
FileUtils.mkdir_p(@origin_path)
|
42
42
|
Dir.chdir(@origin_path) do
|
43
43
|
res, status = exec("git init --bare")
|
44
|
-
raise "Failed to initialize
|
44
|
+
raise "Failed to initialize bare git repository: #{res}" unless status.success?
|
45
45
|
end
|
46
46
|
FileUtils.rm_rf(@repo_path)
|
47
|
-
res, status = exec("git clone --quiet #{@origin_path} #{@repo_path}")
|
47
|
+
res, status = exec("git clone --quiet \"#{@origin_path}\" \"#{@repo_path}\"")
|
48
48
|
raise "Failed to initialize git repository: #{res}" unless status.success?
|
49
49
|
create_file_layout(@repo_path, @repo_content)
|
50
50
|
Dir.chdir(@repo_path) do
|
51
51
|
res, status = exec("git add .")
|
52
|
-
res, status = exec("git commit --quiet -
|
52
|
+
res, status = exec("git commit --quiet -m \"Initial Commit\"") if status.success?
|
53
53
|
res, status = exec("git push origin master") if status.success?
|
54
54
|
raise "Failed to setup git repository: #{res}" unless status.success?
|
55
55
|
end
|
@@ -83,7 +83,7 @@ describe RightScale::GitScraper do
|
|
83
83
|
it 'should scrape' do
|
84
84
|
messages = []
|
85
85
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
86
|
-
puts "\n **ERRORS: #{@scraper.
|
86
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
87
87
|
@scraper.succeeded?.should be_true
|
88
88
|
messages.size.should == 1
|
89
89
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
@@ -92,11 +92,11 @@ describe RightScale::GitScraper do
|
|
92
92
|
|
93
93
|
it 'should scrape incrementally' do
|
94
94
|
@scraper.scrape(@repo)
|
95
|
-
puts "\n **ERRORS: #{@scraper.
|
95
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
96
96
|
@scraper.incremental_update?.should be_true
|
97
97
|
messages = []
|
98
98
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
99
|
-
puts "\n **ERRORS: #{@scraper.
|
99
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
100
100
|
@scraper.succeeded?.should be_true
|
101
101
|
messages.size.should == 1
|
102
102
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
data/spec/spec.opts
ADDED
data/spec/svn_scraper_spec.rb
CHANGED
@@ -38,15 +38,15 @@ describe RightScale::SvnScraper do
|
|
38
38
|
@repo_path = File.join(File.dirname(__FILE__), '__repo')
|
39
39
|
@repo_content = [ 'file1', { 'folder1' => [ 'file2', 'file3' ] }, { 'folder2' => [ { 'folder3' => [ 'file4' ] } ] } ]
|
40
40
|
FileUtils.rm_rf(@svn_repo_path)
|
41
|
-
res, status = exec("svnadmin create #{@svn_repo_path}")
|
41
|
+
res, status = exec("svnadmin create \"#{@svn_repo_path}\"")
|
42
42
|
raise "Failed to initialize SVN repository: #{res}" unless status.success?
|
43
43
|
FileUtils.rm_rf(@repo_path)
|
44
|
-
res, status = exec("svn checkout file
|
44
|
+
res, status = exec("svn checkout \"file:///#{@svn_repo_path}\" \"#{@repo_path}\"")
|
45
45
|
raise "Failed to checkout repository: #{res}" unless status.success?
|
46
46
|
create_file_layout(@repo_path, @repo_content)
|
47
47
|
Dir.chdir(@repo_path) do
|
48
48
|
res, status = exec("svn add *")
|
49
|
-
res, status = exec("svn commit --quiet -m
|
49
|
+
res, status = exec("svn commit --quiet -m \"Initial Commit\"") if status.success?
|
50
50
|
raise "Failed to setup repository: #{res}" unless status.success?
|
51
51
|
end
|
52
52
|
end
|
@@ -79,7 +79,7 @@ describe RightScale::SvnScraper do
|
|
79
79
|
it 'should scrape' do
|
80
80
|
messages = []
|
81
81
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
82
|
-
puts "\n **ERRORS: #{@scraper.
|
82
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
83
83
|
@scraper.succeeded?.should be_true
|
84
84
|
messages.size.should == 1
|
85
85
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
@@ -87,12 +87,13 @@ describe RightScale::SvnScraper do
|
|
87
87
|
end
|
88
88
|
|
89
89
|
it 'should scrape incrementally' do
|
90
|
+
pending "File URLs comparison on Windows is tricky" if RUBY_PLATFORM=~/mswin/
|
90
91
|
@scraper.scrape(@repo)
|
91
|
-
puts "\n **ERRORS: #{@scraper.
|
92
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
92
93
|
@scraper.incremental_update?.should be_true
|
93
94
|
messages = []
|
94
95
|
@scraper.scrape(@repo) { |m, progress| messages << m if progress }
|
95
|
-
puts "\n **ERRORS: #{@scraper.
|
96
|
+
puts "\n **ERRORS: #{@scraper.errors.join("\n")}\n" unless @scraper.succeeded?
|
96
97
|
@scraper.succeeded?.should be_true
|
97
98
|
messages.size.should == 1
|
98
99
|
File.directory?(@scraper.current_repo_dir.should be_true)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: right_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Raphael Simon
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-02 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- spec/repository_spec.rb
|
40
40
|
- spec/scraper_base_spec.rb
|
41
41
|
- spec/scraper_spec.rb
|
42
|
+
- spec/spec.opts
|
42
43
|
- spec/spec_helper.rb
|
43
44
|
- spec/svn_scraper_spec.rb
|
44
45
|
has_rdoc: true
|