right_scraper 3.2.6 → 5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Repositories
|
28
|
+
autoload :Base, 'right_scraper/repositories/base'
|
29
|
+
autoload :Download, 'right_scraper/repositories/download'
|
30
|
+
autoload :Git, 'right_scraper/repositories/git'
|
31
|
+
autoload :Svn, 'right_scraper/repositories/svn'
|
32
|
+
end
|
33
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -20,280 +20,319 @@
|
|
20
20
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
|
+
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
23
27
|
require 'uri'
|
24
28
|
require 'digest/sha1'
|
25
29
|
require 'set'
|
26
30
|
require 'socket'
|
27
31
|
|
28
|
-
module RightScraper
|
32
|
+
module RightScraper::Repositories
|
33
|
+
|
34
|
+
# Description of remote repository that needs to be scraped.
|
35
|
+
#
|
36
|
+
# Repository definitions inherit from this base class. A repository must
|
37
|
+
# register its #repo_type in @@types so that they can be used with
|
38
|
+
# Repositories::Base::from_hash, as follows:
|
39
|
+
#
|
40
|
+
# class Foo < ::RightScraper::Repositories::Base
|
41
|
+
# ...
|
42
|
+
#
|
43
|
+
# # self-register
|
44
|
+
# register_self
|
45
|
+
# register_url_schemas('foo')
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# Subclasses should override #repo_type, #retriever and #to_url; when
|
49
|
+
# sensible, #revision should also be overridden. The most important
|
50
|
+
# methods are #to_url, which will return a +URI+ that completely
|
51
|
+
# characterizes the repository, and #retriever which returns the
|
52
|
+
# appropriate RightScraper::Retrievers::Base to scan that repository.
|
53
|
+
class Base < ::RightScraper::RegisteredBase
|
54
|
+
|
55
|
+
# exceptions
|
56
|
+
class RepositoryError < ::StandardError; end
|
57
|
+
|
58
|
+
# @return [Module] module for registered repository types
|
59
|
+
def self.registration_module
|
60
|
+
::RightScraper::Repositories
|
61
|
+
end
|
29
62
|
|
30
|
-
|
63
|
+
# @return [Set] set of registered repo url schemas
|
64
|
+
def self.registered_url_schemas
|
65
|
+
unless schemas = registration_module.instance_variable_get(:@registered_url_schemas)
|
66
|
+
schemas = ::Set.new(['http', 'https', 'ftp'])
|
67
|
+
registration_module.instance_variable_set(:@registered_url_schemas, schemas)
|
68
|
+
end
|
69
|
+
schemas
|
70
|
+
end
|
31
71
|
|
32
|
-
#
|
72
|
+
# Registers any unknown URL schemas for validation.
|
33
73
|
#
|
34
|
-
#
|
35
|
-
# register its #repo_type in @@types so that they can be used with
|
36
|
-
# Repositories::Base::from_hash, as follows:
|
37
|
-
# class ARepository < Base
|
38
|
-
# ...
|
74
|
+
# @param [Array] args to register as URL schema(s)
|
39
75
|
#
|
40
|
-
#
|
41
|
-
|
42
|
-
|
76
|
+
# @return [TrueClass] always true
|
77
|
+
def self.register_url_schemas(*args)
|
78
|
+
# note that set += blah seems to be badly implemented as set = set + blah
|
79
|
+
# for the Set class, which leaves the original set object unchanged and
|
80
|
+
# will return a new set object with the new data. only use the << operator
|
81
|
+
# to update an existing set object.
|
82
|
+
schemas = registered_url_schemas
|
83
|
+
Array(args).flatten.each { |schema| schemas << schema }
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
# Factory method for a new repository.
|
43
88
|
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# === Return
|
58
|
-
# repo(RightScraper::Repositories::Base):: Resulting repository instance
|
59
|
-
def self.from_hash(opts)
|
60
|
-
repo_class = @@types[opts[:repo_type]]
|
61
|
-
raise "Can't understand how to make #{opts[:repo_type]} repos" if repo_class.nil?
|
62
|
-
repo = repo_class.new
|
63
|
-
unless ENV['DEVELOPMENT']
|
64
|
-
validate_uri opts[:url]
|
65
|
-
end
|
66
|
-
opts.each do |k, v|
|
67
|
-
next if k == :repo_type
|
68
|
-
if [:first_credential, :second_credential].include?(k) && is_useful?(v)
|
69
|
-
v = useful_part(v)
|
70
|
-
end
|
71
|
-
repo.__send__("#{k.to_s}=".to_sym, v)
|
89
|
+
# @param [Hash] repo_hash describing repository to create
|
90
|
+
#
|
91
|
+
# @return [RightScraper::Repositories::Base] repository created
|
92
|
+
def self.from_hash(repo_hash)
|
93
|
+
repo_type = repo_hash[:repo_type].to_s
|
94
|
+
raise ::ArgumentError, ':repo_type is required' if repo_type.empty?
|
95
|
+
repo_class = query_registered_type(repo_type)
|
96
|
+
repo = repo_class.new
|
97
|
+
validate_uri(repo_hash[:url]) unless ENV['DEVELOPMENT']
|
98
|
+
repo_hash.each do |k, v|
|
99
|
+
next if k == :repo_type
|
100
|
+
if [:first_credential, :second_credential].include?(k) && is_useful?(v)
|
101
|
+
v = useful_part(v)
|
72
102
|
end
|
73
|
-
repo
|
103
|
+
repo.__send__("#{k.to_s}=".to_sym, v)
|
74
104
|
end
|
105
|
+
repo
|
106
|
+
end
|
75
107
|
|
76
|
-
|
77
|
-
|
108
|
+
# (String) Human readable repository name used for progress reports
|
109
|
+
attr_accessor :display_name
|
78
110
|
|
79
|
-
|
80
|
-
|
111
|
+
# (Array of String) Subdirectories in the repository to search for resources
|
112
|
+
attr_accessor :resources_path
|
81
113
|
|
82
|
-
|
83
|
-
|
114
|
+
# (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
|
115
|
+
attr_accessor :url
|
84
116
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
117
|
+
# (String) Type of the repository. Currently one of 'git', 'svn'
|
118
|
+
# or 'download', implemented by the appropriate subclass. Needs
|
119
|
+
# to be overridden by subclasses.
|
120
|
+
def repo_type
|
121
|
+
raise NotImplementedError
|
122
|
+
end
|
91
123
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
124
|
+
# (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
|
125
|
+
# repository. Needs to be overridden appropriately by subclasses.
|
126
|
+
#
|
127
|
+
# === Options
|
128
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
129
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
130
|
+
# <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
|
131
|
+
# <tt>:logger</tt>:: Logger to use
|
132
|
+
#
|
133
|
+
# === Returns
|
134
|
+
# retriever(Retrievers::Base):: Corresponding retriever instance
|
135
|
+
def retriever(options)
|
136
|
+
raise NotImplementedError
|
137
|
+
end
|
106
138
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
139
|
+
# Return the revision this repository is currently looking at.
|
140
|
+
#
|
141
|
+
# === Returns
|
142
|
+
# String:: opaque revision type
|
143
|
+
def revision
|
144
|
+
nil
|
145
|
+
end
|
114
146
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
147
|
+
# Return a unique identifier for this repository ignoring the tags
|
148
|
+
# to check out.
|
149
|
+
#
|
150
|
+
# === Returns
|
151
|
+
# String:: opaque unique ID for this repository
|
152
|
+
def repository_hash
|
153
|
+
digest("#{::RightScraper::PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
|
154
|
+
end
|
123
155
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
156
|
+
# Return a unique identifier for this revision in this repository.
|
157
|
+
#
|
158
|
+
# === Returns
|
159
|
+
# String:: opaque unique ID for this revision in this repository
|
160
|
+
def checkout_hash
|
161
|
+
repository_hash
|
162
|
+
end
|
131
163
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
164
|
+
# Unique representation for this repo, should resolve to the same string
|
165
|
+
# for repos that should be cloned in same directory
|
166
|
+
#
|
167
|
+
# === Returns
|
168
|
+
# res(String):: Unique representation for this repo
|
169
|
+
def to_s
|
170
|
+
res = "#{repo_type} #{url}"
|
171
|
+
end
|
140
172
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
173
|
+
# Convert this repository to a URL in the style of resource URLs.
|
174
|
+
#
|
175
|
+
# === Returns
|
176
|
+
# URI:: URL representing this repository
|
177
|
+
def to_url
|
178
|
+
URI.parse(url)
|
179
|
+
end
|
148
180
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
end
|
181
|
+
# Return true if this repository and +other+ represent the same
|
182
|
+
# repository including the same checkout tag.
|
183
|
+
#
|
184
|
+
# === Parameters
|
185
|
+
# other(Repositories::Base):: repository to compare with
|
186
|
+
#
|
187
|
+
# === Returns
|
188
|
+
# Boolean:: true iff this repository and +other+ are the same
|
189
|
+
def ==(other)
|
190
|
+
if other.is_a?(RightScraper::Repositories::Base)
|
191
|
+
checkout_hash == other.checkout_hash
|
192
|
+
else
|
193
|
+
false
|
163
194
|
end
|
195
|
+
end
|
164
196
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
end
|
197
|
+
# Return true if this repository and +other+ represent the same
|
198
|
+
# repository, excluding the checkout tag.
|
199
|
+
#
|
200
|
+
# === Parameters
|
201
|
+
# other(Repositories::Base):: repository to compare with
|
202
|
+
#
|
203
|
+
# === Returns
|
204
|
+
# Boolean:: true iff this repository and +other+ are the same
|
205
|
+
def equal_repo?(other)
|
206
|
+
if other.is_a?(RightScraper::Repositories::Base)
|
207
|
+
repository_hash == other.repository_hash
|
208
|
+
else
|
209
|
+
false
|
179
210
|
end
|
211
|
+
end
|
180
212
|
|
181
|
-
|
182
|
-
# ('git', 'svn' or 'download' currently) to the class that
|
183
|
-
# represents that repository.
|
184
|
-
@@types = {} unless class_variable_defined?(:@@types)
|
213
|
+
protected
|
185
214
|
|
186
|
-
|
187
|
-
|
215
|
+
# Return true iff this credential is useful. Currently "useful"
|
216
|
+
# means "nonempty and not all spaces".
|
217
|
+
def self.is_useful?(credential)
|
218
|
+
credential && !credential.strip.empty?
|
219
|
+
end
|
188
220
|
|
189
|
-
|
221
|
+
# Return the useful portion of this credential. Currently strips
|
222
|
+
# out any spaces.
|
223
|
+
def self.useful_part(credential)
|
224
|
+
credential.strip
|
225
|
+
end
|
190
226
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
227
|
+
# Compute a unique identifier for the given string. Currently uses SHA1.
|
228
|
+
#
|
229
|
+
# === Parameters
|
230
|
+
# string(String):: string to compute unique identifier for
|
231
|
+
#
|
232
|
+
# === Returns
|
233
|
+
# String:: unique identifier
|
234
|
+
def digest(string)
|
235
|
+
Digest::SHA1.hexdigest(string)
|
236
|
+
end
|
196
237
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
credential.strip
|
201
|
-
end
|
238
|
+
# Regexp matching everything not allowed in a URI and also ':',
|
239
|
+
# '@' and '/', to be used for encoding usernames and passwords.
|
240
|
+
USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
|
202
241
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
242
|
+
# Return a URI with the given username and password set.
|
243
|
+
#
|
244
|
+
# === Parameters
|
245
|
+
# uri(URI or String):: URI to add user identification to
|
246
|
+
#
|
247
|
+
# === Returns
|
248
|
+
# URI:: URI with username and password identification added
|
249
|
+
def add_users_to(uri, username=nil, password=nil)
|
250
|
+
begin
|
251
|
+
uri = URI.parse(uri) if uri.instance_of?(String)
|
252
|
+
if username
|
253
|
+
userinfo = URI.escape(username, USERPW)
|
254
|
+
userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
|
255
|
+
uri.userinfo = userinfo
|
256
|
+
end
|
257
|
+
uri
|
258
|
+
rescue URI::InvalidURIError
|
259
|
+
if uri =~ PATTERN::GIT_URI
|
260
|
+
user, host, path = $1, $2, $3
|
261
|
+
userinfo = URI.escape(user, USERPW)
|
262
|
+
userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
|
263
|
+
path = "/" + path unless path.start_with?('/')
|
264
|
+
URI::Generic::build({:scheme => "ssh",
|
265
|
+
:userinfo => userinfo,
|
266
|
+
:host => host,
|
267
|
+
:path => path
|
268
|
+
})
|
269
|
+
else
|
270
|
+
raise
|
271
|
+
end
|
212
272
|
end
|
273
|
+
end
|
213
274
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
if uri =~ PATTERN::GIT_URI
|
236
|
-
user, host, path = $1, $2, $3
|
237
|
-
userinfo = URI.escape(user, USERPW)
|
238
|
-
userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
|
239
|
-
path = "/" + path unless path.start_with?('/')
|
240
|
-
URI::Generic::build({:scheme => "ssh",
|
241
|
-
:userinfo => userinfo,
|
242
|
-
:host => host,
|
243
|
-
:path => path
|
244
|
-
})
|
245
|
-
else
|
246
|
-
raise
|
247
|
-
end
|
275
|
+
module PATTERN
|
276
|
+
include URI::REGEXP::PATTERN
|
277
|
+
GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
|
278
|
+
end
|
279
|
+
|
280
|
+
SSH_PORT = 22
|
281
|
+
|
282
|
+
def self.validate_uri(uri)
|
283
|
+
begin
|
284
|
+
uri = URI.parse(uri) if uri.instance_of?(String)
|
285
|
+
unless registered_url_schemas.include?(uri.scheme)
|
286
|
+
raise RepositoryError,
|
287
|
+
"Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}"
|
288
|
+
end
|
289
|
+
check_host(uri, uri.host, uri.port)
|
290
|
+
rescue URI::InvalidURIError
|
291
|
+
# could be a Git type URI.
|
292
|
+
if uri =~ PATTERN::GIT_URI
|
293
|
+
check_host(uri, $2, SSH_PORT)
|
294
|
+
else
|
295
|
+
raise
|
248
296
|
end
|
249
297
|
end
|
298
|
+
end
|
250
299
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
300
|
+
def self.check_host(uri, host, port)
|
301
|
+
begin
|
302
|
+
possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
|
303
|
+
if possibles.nil? || possibles.empty?
|
304
|
+
raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
305
|
+
end
|
306
|
+
possibles.each do |possible|
|
307
|
+
family, port, hostname, address, protocol_family, socket_type, protocol = possible
|
255
308
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
# could be a Git type URI.
|
265
|
-
if uri =~ PATTERN::GIT_URI
|
266
|
-
check_host(uri, $2, SSH_PORT)
|
267
|
-
else
|
268
|
-
raise
|
309
|
+
# Our EC2 gateway is not permitted.
|
310
|
+
if address == '169.254.169.254'
|
311
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
312
|
+
end
|
313
|
+
|
314
|
+
# Loopbacks are not permitted.
|
315
|
+
if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
|
316
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
269
317
|
end
|
270
|
-
end
|
271
|
-
end
|
272
318
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
raise "Invalid URI #{uri}"
|
282
|
-
# Loopbacks are not permitted.
|
283
|
-
raise "Invalid URI #{uri}" if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
|
284
|
-
|
285
|
-
# Private networks are not permitted
|
286
|
-
raise "Invalid URI #{uri}" if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
|
287
|
-
raise "Invalid URI #{uri}" if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
|
288
|
-
raise "Invalid URI #{uri}" if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
|
319
|
+
# Private networks are not permitted
|
320
|
+
if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
|
321
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
322
|
+
end
|
323
|
+
if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
|
324
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
325
|
+
end
|
326
|
+
if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
|
327
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
289
328
|
end
|
290
|
-
true
|
291
|
-
rescue SocketError
|
292
|
-
# means the host doesn't exist
|
293
|
-
raise "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
294
329
|
end
|
330
|
+
true
|
331
|
+
rescue SocketError
|
332
|
+
# means the host doesn't exist
|
333
|
+
raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
295
334
|
end
|
296
|
-
|
297
335
|
end
|
336
|
+
|
298
337
|
end
|
299
338
|
end
|