right_scraper 1.0.26 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +16 -0
- data/README.rdoc +9 -28
- data/Rakefile +51 -39
- data/lib/right_scraper/builders/base.rb +64 -0
- data/lib/right_scraper/builders/filesystem.rb +96 -0
- data/lib/right_scraper/builders/union.rb +57 -0
- data/lib/right_scraper/logger.rb +102 -0
- data/lib/right_scraper/loggers/noisy.rb +85 -0
- data/lib/right_scraper/processes/ssh.rb +188 -0
- data/lib/right_scraper/repositories/base.rb +299 -0
- data/lib/right_scraper/repositories/download.rb +90 -0
- data/lib/right_scraper/repositories/git.rb +92 -0
- data/lib/right_scraper/repositories/mock.rb +70 -0
- data/lib/right_scraper/repositories/svn.rb +96 -0
- data/lib/right_scraper/resources/base.rb +70 -0
- data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
- data/lib/right_scraper/resources/workflow.rb +55 -0
- data/lib/right_scraper/retrievers/base.rb +114 -0
- data/lib/right_scraper/retrievers/checkout.rb +79 -0
- data/lib/right_scraper/retrievers/download.rb +97 -0
- data/lib/right_scraper/retrievers/git.rb +140 -0
- data/lib/right_scraper/retrievers/svn.rb +87 -0
- data/lib/right_scraper/scanners/base.rb +111 -0
- data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
- data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
- data/lib/right_scraper/scanners/union.rb +89 -0
- data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
- data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
- data/lib/right_scraper/scraper.rb +81 -57
- data/lib/right_scraper/scraper_logger.rb +61 -0
- data/lib/right_scraper/scrapers/base.rb +262 -0
- data/lib/right_scraper/scrapers/cookbook.rb +73 -0
- data/lib/right_scraper/scrapers/workflow.rb +88 -0
- data/lib/right_scraper/svn_client.rb +101 -0
- data/lib/right_scraper/version.rb +28 -0
- data/lib/right_scraper.rb +35 -11
- data/right_scraper.gemspec +26 -13
- data/right_scraper.rconf +13 -0
- data/spec/builder_spec.rb +50 -0
- data/spec/cookbook_helper.rb +73 -0
- data/spec/cookbook_manifest_spec.rb +55 -0
- data/spec/cookbook_s3_upload_spec.rb +152 -0
- data/spec/download/download_retriever_spec.rb +118 -0
- data/spec/download/download_retriever_spec_helper.rb +72 -0
- data/spec/download/download_spec.rb +130 -0
- data/spec/download/multi_dir_spec.rb +106 -0
- data/spec/download/multi_dir_spec_helper.rb +40 -0
- data/spec/git/cookbook_spec.rb +166 -0
- data/spec/git/demokey +27 -0
- data/spec/git/demokey.pub +1 -0
- data/spec/git/password_key +30 -0
- data/spec/git/password_key.pub +1 -0
- data/spec/git/repository_spec.rb +110 -0
- data/spec/git/retriever_spec.rb +505 -0
- data/spec/git/retriever_spec_helper.rb +112 -0
- data/spec/git/scraper_spec.rb +136 -0
- data/spec/git/ssh_spec.rb +170 -0
- data/spec/git/url_spec.rb +103 -0
- data/spec/logger_spec.rb +185 -0
- data/spec/repository_spec.rb +89 -23
- data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
- data/spec/scanner_spec.rb +61 -0
- data/spec/scraper_helper.rb +96 -0
- data/spec/scraper_spec.rb +123 -45
- data/spec/spec_helper.rb +87 -14
- data/spec/svn/cookbook_spec.rb +97 -0
- data/spec/svn/multi_svn_spec.rb +64 -0
- data/spec/svn/multi_svn_spec_helper.rb +40 -0
- data/spec/svn/repository_spec.rb +72 -0
- data/spec/svn/retriever_spec.rb +261 -0
- data/spec/svn/scraper_spec.rb +90 -0
- data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
- data/spec/svn/url_spec.rb +47 -0
- data/spec/url_spec.rb +164 -0
- metadata +203 -31
- data/lib/right_scraper/linux/process_monitor.rb +0 -84
- data/lib/right_scraper/repository.rb +0 -78
- data/lib/right_scraper/scraper_base.rb +0 -175
- data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
- data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
- data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
- data/lib/right_scraper/watcher.rb +0 -158
- data/lib/right_scraper/win32/process_monitor.rb +0 -98
- data/spec/download/download_scraper_spec.rb +0 -94
- data/spec/git/git_scraper_spec.rb +0 -165
- data/spec/git/git_scraper_spec_helper.rb +0 -72
- data/spec/rcov.opts +0 -1
- data/spec/spec.opts +0 -2
- data/spec/svn/svn_scraper_spec.rb +0 -148
- data/spec/watcher_spec.rb +0 -74
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
require 'tempfile'
|
|
24
|
+
require 'process_watcher'
|
|
25
|
+
require 'tmpdir'
|
|
26
|
+
|
|
27
|
+
module RightScraper
|
|
28
|
+
module Processes
|
|
29
|
+
# Manage a dedicated SSH agent.
|
|
30
|
+
class SSHAgent
|
|
31
|
+
def initialize
|
|
32
|
+
@display = ENV['DISPLAY']
|
|
33
|
+
@askpass = ENV['SSH_ASKPASS']
|
|
34
|
+
@sshauth = ENV['SSH_AUTH_SOCK']
|
|
35
|
+
@agentpid = ENV['SSH_AGENT_PID']
|
|
36
|
+
@home = ENV['HOME']
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Open a connection to the SSH agent and set +ENV+
|
|
40
|
+
# appropriately.
|
|
41
|
+
def open
|
|
42
|
+
ENV['SSH_ASKPASS'] = File.expand_path(File.join(File.dirname(__FILE__),
|
|
43
|
+
'..', '..', '..',
|
|
44
|
+
'scripts',
|
|
45
|
+
'stub_ssh_askpass'))
|
|
46
|
+
ENV['HOME'] = "/dev/null"
|
|
47
|
+
@dir = Dir.mktmpdir
|
|
48
|
+
@socketfile = File.join(@dir, "agent")
|
|
49
|
+
@monitor = ProcessWatcher::ProcessMonitor.new
|
|
50
|
+
@pid = @monitor.spawn('ssh-agent', '-a', @socketfile, '-d') {}
|
|
51
|
+
timeout = 0
|
|
52
|
+
until File.exists?(@socketfile)
|
|
53
|
+
timeout += 1
|
|
54
|
+
sleep 0.1
|
|
55
|
+
if timeout > 100
|
|
56
|
+
raise "Couldn't find SSH agent control socket in time. Timing out"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
ENV['SSH_AGENT_PID'] = @pid.to_s
|
|
60
|
+
ENV['SSH_AUTH_SOCK'] = @socketfile
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Close the connection to the SSH agent, and restore +ENV+.
|
|
64
|
+
def close
|
|
65
|
+
begin
|
|
66
|
+
FileUtils.remove_entry_secure @dir
|
|
67
|
+
lay_to_rest(@pid) if @pid
|
|
68
|
+
@monitor.cleanup if @monitor
|
|
69
|
+
ensure
|
|
70
|
+
setvar 'SSH_AGENT_PID', @agentpid
|
|
71
|
+
setvar 'DISPLAY', @display
|
|
72
|
+
setvar 'SSH_ASKPASS', @askpass
|
|
73
|
+
setvar 'SSH_AUTH_SOCK', @sshauth
|
|
74
|
+
setvar 'HOME', @home
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Kill +pid+. Initially use SIGTERM to be kind and a good
|
|
79
|
+
# citizen. If it doesn't die after +timeout+ seconds, use
|
|
80
|
+
# SIGKILL instead. In any case, the process will die. The
|
|
81
|
+
# status information is accessible in $?.
|
|
82
|
+
#
|
|
83
|
+
# === Parameters
|
|
84
|
+
# pid(Fixnum):: pid of process to kill
|
|
85
|
+
# timeout(Fixnum):: time in seconds to wait before forcing
|
|
86
|
+
# process to die. Defaults to 10 seconds.
|
|
87
|
+
def lay_to_rest(pid, timeout=10)
|
|
88
|
+
#refuse to kill ourselves, or to pass a bad arg to Process.kill
|
|
89
|
+
return 0 unless pid.is_a?(Integer) && pid > 0
|
|
90
|
+
|
|
91
|
+
Process.kill('TERM', pid)
|
|
92
|
+
time_waited = 0
|
|
93
|
+
loop do
|
|
94
|
+
if time_waited >= timeout
|
|
95
|
+
Process.kill('KILL', pid)
|
|
96
|
+
# can't waitpid here, because the ssh-agent isn't our
|
|
97
|
+
# child. Still, after SIGKILL it will die and init will
|
|
98
|
+
# reap it, so continue
|
|
99
|
+
return
|
|
100
|
+
end
|
|
101
|
+
# still can't waitpid here, so we see if it's still alive
|
|
102
|
+
return unless still_alive?(pid)
|
|
103
|
+
sleep 1
|
|
104
|
+
time_waited += 1
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Check to see if the process +pid+ is still alive, by sending
|
|
109
|
+
# the 0 signal and checking for an exception.
|
|
110
|
+
#
|
|
111
|
+
# === Parameters
|
|
112
|
+
# pid(Fixnum):: pid of process to check on
|
|
113
|
+
#
|
|
114
|
+
# === Return
|
|
115
|
+
# Boolean:: true if process is still alive
|
|
116
|
+
def still_alive?(pid)
|
|
117
|
+
begin
|
|
118
|
+
Process.kill(0, pid)
|
|
119
|
+
true
|
|
120
|
+
rescue Errno::ESRCH
|
|
121
|
+
false
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Set an environment variable to a value. If +value+ is nil,
|
|
126
|
+
# delete the variable instead.
|
|
127
|
+
#
|
|
128
|
+
# === Parameters
|
|
129
|
+
# key(String):: environment variable name
|
|
130
|
+
# value(String or nil):: proposed new value
|
|
131
|
+
#
|
|
132
|
+
# === Return
|
|
133
|
+
# true
|
|
134
|
+
def setvar(key, value)
|
|
135
|
+
if value.nil?
|
|
136
|
+
ENV.delete(key)
|
|
137
|
+
else
|
|
138
|
+
ENV[key] = value
|
|
139
|
+
end
|
|
140
|
+
true
|
|
141
|
+
end
|
|
142
|
+
private :setvar
|
|
143
|
+
|
|
144
|
+
# Add the given key data to the ssh agent.
|
|
145
|
+
#
|
|
146
|
+
# === Parameters
|
|
147
|
+
# key(String):: ssh key data
|
|
148
|
+
def add_key(key)
|
|
149
|
+
begin
|
|
150
|
+
file = Tempfile.new('key')
|
|
151
|
+
file.puts(key)
|
|
152
|
+
file.close
|
|
153
|
+
|
|
154
|
+
add_keyfile(file.path)
|
|
155
|
+
ensure
|
|
156
|
+
file.close(true) unless file.nil?
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Add the key data in the given file to the ssh agent.
|
|
161
|
+
#
|
|
162
|
+
# === Parameters
|
|
163
|
+
# file(String):: file containing key data
|
|
164
|
+
def add_keyfile(file)
|
|
165
|
+
ProcessWatcher.watch("ssh-add", [file], nil, -1, 10)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Execute the block in a new ssh agent.
|
|
169
|
+
# Equivalent to
|
|
170
|
+
# agent = SSHAgent.new
|
|
171
|
+
# begin
|
|
172
|
+
# agent.open
|
|
173
|
+
# ...
|
|
174
|
+
# ensure
|
|
175
|
+
# agent.close
|
|
176
|
+
# end
|
|
177
|
+
def self.with
|
|
178
|
+
agent = SSHAgent.new
|
|
179
|
+
begin
|
|
180
|
+
agent.open
|
|
181
|
+
yield agent
|
|
182
|
+
ensure
|
|
183
|
+
agent.close
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
require 'uri'
|
|
24
|
+
require 'digest/sha1'
|
|
25
|
+
require 'set'
|
|
26
|
+
require 'socket'
|
|
27
|
+
|
|
28
|
+
module RightScraper
|
|
29
|
+
|
|
30
|
+
module Repositories
|
|
31
|
+
|
|
32
|
+
# Description of remote repository that needs to be scraped.
|
|
33
|
+
#
|
|
34
|
+
# Repository definitions inherit from this base class. A repository must
|
|
35
|
+
# register its #repo_type in @@types so that they can be used with
|
|
36
|
+
# Repositories::Base::from_hash, as follows:
|
|
37
|
+
# class ARepository < Base
|
|
38
|
+
# ...
|
|
39
|
+
#
|
|
40
|
+
# # Add this repository to the list of available types.
|
|
41
|
+
# @@types[:arepository] = ARepository
|
|
42
|
+
# end
|
|
43
|
+
#
|
|
44
|
+
# Subclasses should override #repo_type, #retriever and #to_url; when
|
|
45
|
+
# sensible, #revision should also be overridden. The most important
|
|
46
|
+
# methods are #to_url, which will return a +URI+ that completely
|
|
47
|
+
# characterizes the repository, and #retriever which returns the
|
|
48
|
+
# appropriate RightScraper::Retrievers::Base to scan that repository.
|
|
49
|
+
class Base
|
|
50
|
+
|
|
51
|
+
# Initialize repository from given hash
|
|
52
|
+
# Hash keys should correspond to attributes of this class
|
|
53
|
+
#
|
|
54
|
+
# === Parameters
|
|
55
|
+
# opts(Hash):: Hash to be converted into a RightScraper::Repositories::Base instance
|
|
56
|
+
#
|
|
57
|
+
# === Return
|
|
58
|
+
# repo(RightScraper::Repositories::Base):: Resulting repository instance
|
|
59
|
+
def self.from_hash(opts)
|
|
60
|
+
repo_class = @@types[opts[:repo_type]]
|
|
61
|
+
raise "Can't understand how to make #{opts[:repo_type]} repos" if repo_class.nil?
|
|
62
|
+
repo = repo_class.new
|
|
63
|
+
unless ENV['DEVELOPMENT']
|
|
64
|
+
validate_uri opts[:url]
|
|
65
|
+
end
|
|
66
|
+
opts.each do |k, v|
|
|
67
|
+
next if k == :repo_type
|
|
68
|
+
if [:first_credential, :second_credential].include?(k) && is_useful?(v)
|
|
69
|
+
v = useful_part(v)
|
|
70
|
+
end
|
|
71
|
+
repo.__send__("#{k.to_s}=".to_sym, v)
|
|
72
|
+
end
|
|
73
|
+
repo
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# (String) Human readable repository name used for progress reports
|
|
77
|
+
attr_accessor :display_name
|
|
78
|
+
|
|
79
|
+
# (Array of String) Subdirectories in the repository to search for resources
|
|
80
|
+
attr_accessor :resources_path
|
|
81
|
+
|
|
82
|
+
# (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
|
|
83
|
+
attr_accessor :url
|
|
84
|
+
|
|
85
|
+
# (String) Type of the repository. Currently one of 'git', 'svn'
|
|
86
|
+
# or 'download', implemented by the appropriate subclass. Needs
|
|
87
|
+
# to be overridden by subclasses.
|
|
88
|
+
def repo_type
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
|
|
93
|
+
# repository. Needs to be overridden appropriately by subclasses.
|
|
94
|
+
#
|
|
95
|
+
# === Options
|
|
96
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
|
97
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
|
98
|
+
# <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
|
|
99
|
+
# <tt>:logger</tt>:: Logger to use
|
|
100
|
+
#
|
|
101
|
+
# === Returns
|
|
102
|
+
# retriever(Retrievers::Base):: Corresponding retriever instance
|
|
103
|
+
def retriever(options)
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Return the revision this repository is currently looking at.
|
|
108
|
+
#
|
|
109
|
+
# === Returns
|
|
110
|
+
# String:: opaque revision type
|
|
111
|
+
def revision
|
|
112
|
+
nil
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Return a unique identifier for this repository ignoring the tags
|
|
116
|
+
# to check out.
|
|
117
|
+
#
|
|
118
|
+
# === Returns
|
|
119
|
+
# String:: opaque unique ID for this repository
|
|
120
|
+
def repository_hash
|
|
121
|
+
digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Return a unique identifier for this revision in this repository.
|
|
125
|
+
#
|
|
126
|
+
# === Returns
|
|
127
|
+
# String:: opaque unique ID for this revision in this repository
|
|
128
|
+
def checkout_hash
|
|
129
|
+
repository_hash
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Unique representation for this repo, should resolve to the same string
|
|
133
|
+
# for repos that should be cloned in same directory
|
|
134
|
+
#
|
|
135
|
+
# === Returns
|
|
136
|
+
# res(String):: Unique representation for this repo
|
|
137
|
+
def to_s
|
|
138
|
+
res = "#{repo_type} #{url}"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Convert this repository to a URL in the style of resource URLs.
|
|
142
|
+
#
|
|
143
|
+
# === Returns
|
|
144
|
+
# URI:: URL representing this repository
|
|
145
|
+
def to_url
|
|
146
|
+
URI.parse(url)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Return true if this repository and +other+ represent the same
|
|
150
|
+
# repository including the same checkout tag.
|
|
151
|
+
#
|
|
152
|
+
# === Parameters
|
|
153
|
+
# other(Repositories::Base):: repository to compare with
|
|
154
|
+
#
|
|
155
|
+
# === Returns
|
|
156
|
+
# Boolean:: true iff this repository and +other+ are the same
|
|
157
|
+
def ==(other)
|
|
158
|
+
if other.is_a?(RightScraper::Repositories::Base)
|
|
159
|
+
checkout_hash == other.checkout_hash
|
|
160
|
+
else
|
|
161
|
+
false
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Return true if this repository and +other+ represent the same
|
|
166
|
+
# repository, excluding the checkout tag.
|
|
167
|
+
#
|
|
168
|
+
# === Parameters
|
|
169
|
+
# other(Repositories::Base):: repository to compare with
|
|
170
|
+
#
|
|
171
|
+
# === Returns
|
|
172
|
+
# Boolean:: true iff this repository and +other+ are the same
|
|
173
|
+
def equal_repo?(other)
|
|
174
|
+
if other.is_a?(RightScraper::Repositories::Base)
|
|
175
|
+
repository_hash == other.repository_hash
|
|
176
|
+
else
|
|
177
|
+
false
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# (Hash) Lookup table from textual description of repository type
|
|
182
|
+
# ('git', 'svn' or 'download' currently) to the class that
|
|
183
|
+
# represents that repository.
|
|
184
|
+
@@types = {} unless class_variable_defined?(:@@types)
|
|
185
|
+
|
|
186
|
+
# (Set) list of acceptable URI schemes. Initially just http, https and ftp.
|
|
187
|
+
@@okay_schemes = Set.new(["http", "https", "ftp"])
|
|
188
|
+
|
|
189
|
+
protected
|
|
190
|
+
|
|
191
|
+
# Return true iff this credential is useful. Currently "useful"
|
|
192
|
+
# means "nonempty and not all spaces".
|
|
193
|
+
def self.is_useful?(credential)
|
|
194
|
+
credential && !credential.strip.empty?
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Return the useful portion of this credential. Currently strips
|
|
198
|
+
# out any spaces.
|
|
199
|
+
def self.useful_part(credential)
|
|
200
|
+
credential.strip
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Compute a unique identifier for the given string. Currently uses SHA1.
|
|
204
|
+
#
|
|
205
|
+
# === Parameters
|
|
206
|
+
# string(String):: string to compute unique identifier for
|
|
207
|
+
#
|
|
208
|
+
# === Returns
|
|
209
|
+
# String:: unique identifier
|
|
210
|
+
def digest(string)
|
|
211
|
+
Digest::SHA1.hexdigest(string)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Regexp matching everything not allowed in a URI and also ':',
|
|
215
|
+
# '@' and '/', to be used for encoding usernames and passwords.
|
|
216
|
+
USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
|
|
217
|
+
|
|
218
|
+
# Return a URI with the given username and password set.
|
|
219
|
+
#
|
|
220
|
+
# === Parameters
|
|
221
|
+
# uri(URI or String):: URI to add user identification to
|
|
222
|
+
#
|
|
223
|
+
# === Returns
|
|
224
|
+
# URI:: URI with username and password identification added
|
|
225
|
+
def add_users_to(uri, username=nil, password=nil)
|
|
226
|
+
begin
|
|
227
|
+
uri = URI.parse(uri) if uri.instance_of?(String)
|
|
228
|
+
if username
|
|
229
|
+
userinfo = URI.escape(username, USERPW)
|
|
230
|
+
userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
|
|
231
|
+
uri.userinfo = userinfo
|
|
232
|
+
end
|
|
233
|
+
uri
|
|
234
|
+
rescue URI::InvalidURIError
|
|
235
|
+
if uri =~ PATTERN::GIT_URI
|
|
236
|
+
user, host, path = $1, $2, $3
|
|
237
|
+
userinfo = URI.escape(user, USERPW)
|
|
238
|
+
userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
|
|
239
|
+
path = "/" + path unless path.start_with?('/')
|
|
240
|
+
URI::Generic::build({:scheme => "ssh",
|
|
241
|
+
:userinfo => userinfo,
|
|
242
|
+
:host => host,
|
|
243
|
+
:path => path
|
|
244
|
+
})
|
|
245
|
+
else
|
|
246
|
+
raise
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
module PATTERN
|
|
252
|
+
include URI::REGEXP::PATTERN
|
|
253
|
+
GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
SSH_PORT = 22
|
|
257
|
+
|
|
258
|
+
def self.validate_uri(uri)
|
|
259
|
+
begin
|
|
260
|
+
uri = URI.parse(uri) if uri.instance_of?(String)
|
|
261
|
+
raise "Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}" unless @@okay_schemes.include?(uri.scheme)
|
|
262
|
+
check_host(uri, uri.host, uri.port)
|
|
263
|
+
rescue URI::InvalidURIError
|
|
264
|
+
# could be a Git type URI.
|
|
265
|
+
if uri =~ PATTERN::GIT_URI
|
|
266
|
+
check_host(uri, $2, SSH_PORT)
|
|
267
|
+
else
|
|
268
|
+
raise
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def self.check_host(uri, host, port)
|
|
274
|
+
begin
|
|
275
|
+
possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
|
|
276
|
+
raise "Invalid URI #{uri}: no hosts for #{host}:#{port}" if possibles.nil? || possibles.empty?
|
|
277
|
+
possibles.each do |possible|
|
|
278
|
+
family, port, hostname, address, protocol_family, socket_type, protocol = possible
|
|
279
|
+
|
|
280
|
+
# Our EC2 gateway is not permitted.
|
|
281
|
+
raise "Invalid URI #{uri}" if address == "169.254.169.254"
|
|
282
|
+
# Loopbacks are not permitted.
|
|
283
|
+
raise "Invalid URI #{uri}" if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
|
|
284
|
+
|
|
285
|
+
# Private networks are not permitted
|
|
286
|
+
raise "Invalid URI #{uri}" if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
|
|
287
|
+
raise "Invalid URI #{uri}" if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
|
|
288
|
+
raise "Invalid URI #{uri}" if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
|
|
289
|
+
end
|
|
290
|
+
true
|
|
291
|
+
rescue SocketError
|
|
292
|
+
# means the host doesn't exist
|
|
293
|
+
raise "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScraper
|
|
25
|
+
module Repositories
|
|
26
|
+
# A repository that is just an archive file hanging off a
|
|
27
|
+
# web server somewhere. This version uses a command line curl to
|
|
28
|
+
# download the archive, and command line tar to extract it.
|
|
29
|
+
class Download < Base
|
|
30
|
+
# (String) Type of the repository, here 'download'.
|
|
31
|
+
def repo_type
|
|
32
|
+
:download
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# (String) Optional, username
|
|
36
|
+
attr_accessor :first_credential
|
|
37
|
+
|
|
38
|
+
# (String) Optional, password
|
|
39
|
+
attr_accessor :second_credential
|
|
40
|
+
|
|
41
|
+
# (String) Optional, SHA of contents
|
|
42
|
+
attr_accessor :tag
|
|
43
|
+
alias_method :revision, :tag
|
|
44
|
+
|
|
45
|
+
# Unique representation for this repo, should resolve to the same string
|
|
46
|
+
# for repos that should be cloned in same directory
|
|
47
|
+
#
|
|
48
|
+
# === Returns
|
|
49
|
+
# res(String):: Unique representation for this repo
|
|
50
|
+
def to_s
|
|
51
|
+
res = "download #{url}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Convert this repository to a URL in the style of resource URLs.
|
|
55
|
+
#
|
|
56
|
+
# === Returns
|
|
57
|
+
# URI:: URL representing this repository
|
|
58
|
+
def to_url
|
|
59
|
+
add_users_to(url, first_credential, second_credential)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Return a unique identifier for this revision in this repository.
|
|
63
|
+
#
|
|
64
|
+
# === Returns
|
|
65
|
+
# String:: opaque unique ID for this revision in this repository
|
|
66
|
+
def checkout_hash
|
|
67
|
+
digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}\000#{tag}")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Instantiate retriever for this kind of repository
|
|
71
|
+
#
|
|
72
|
+
# === Options
|
|
73
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
|
74
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
|
75
|
+
# <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
|
|
76
|
+
# <tt>:logger</tt>:: Logger to use
|
|
77
|
+
#
|
|
78
|
+
# === Return
|
|
79
|
+
# retriever(Retrivers::Download):: Retriever for this repository
|
|
80
|
+
def retriever(options)
|
|
81
|
+
RightScraper::Retrievers::Download.new(self, options)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# Add this repository to the list of available types.
|
|
86
|
+
@@types[:download] = RightScraper::Repositories::Download
|
|
87
|
+
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
module RightScraper
|
|
25
|
+
module Repositories
|
|
26
|
+
# A Git repository.
|
|
27
|
+
class Git < Base
|
|
28
|
+
|
|
29
|
+
# (String) Optional, tag or branch of repository that should be downloaded
|
|
30
|
+
attr_accessor :tag
|
|
31
|
+
alias_method :revision, :tag
|
|
32
|
+
|
|
33
|
+
# (String) Optional, git private SSH key content
|
|
34
|
+
attr_accessor :first_credential
|
|
35
|
+
alias_method :ssh_key, :first_credential
|
|
36
|
+
|
|
37
|
+
# Initialize repository
|
|
38
|
+
def initialize(*args)
|
|
39
|
+
super
|
|
40
|
+
@tag = "master" if @tag.nil?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# (String) Type of the repository, here 'git'.
|
|
44
|
+
def repo_type
|
|
45
|
+
:git
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Return a unique identifier for this revision in this repository.
|
|
49
|
+
#
|
|
50
|
+
# === Returns
|
|
51
|
+
# String:: opaque unique ID for this revision in this repository
|
|
52
|
+
def checkout_hash
|
|
53
|
+
digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}\000#{tag}")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Convert this repository to a URL in the style of resource URLs.
|
|
57
|
+
#
|
|
58
|
+
# === Returns
|
|
59
|
+
# URI:: URL representing this repository
|
|
60
|
+
def to_url
|
|
61
|
+
if first_credential
|
|
62
|
+
uri = add_users_to(url, first_credential)
|
|
63
|
+
else
|
|
64
|
+
uri = URI.parse(url)
|
|
65
|
+
end
|
|
66
|
+
uri
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Instantiate retriever for this kind of repository
|
|
70
|
+
#
|
|
71
|
+
# === Options
|
|
72
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
|
73
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
|
74
|
+
# <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
|
|
75
|
+
# <tt>:logger</tt>:: Logger to use
|
|
76
|
+
#
|
|
77
|
+
# === Return
|
|
78
|
+
# retriever(Retrivers::Git):: Retriever for this repository
|
|
79
|
+
def retriever(options)
|
|
80
|
+
RightScraper::Retrievers::Git.new(self, options)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Add this repository to the list of available types.
|
|
84
|
+
@@types[:git] = RightScraper::Repositories::Git
|
|
85
|
+
|
|
86
|
+
# Add git URL schemas to the list of okay schemas.
|
|
87
|
+
@@okay_schemes << "git"
|
|
88
|
+
@@okay_schemes << "git+ssh"
|
|
89
|
+
@@okay_schemes << "ssh"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|