right_scraper 3.2.6 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Repositories
|
28
|
+
autoload :Base, 'right_scraper/repositories/base'
|
29
|
+
autoload :Download, 'right_scraper/repositories/download'
|
30
|
+
autoload :Git, 'right_scraper/repositories/git'
|
31
|
+
autoload :Svn, 'right_scraper/repositories/svn'
|
32
|
+
end
|
33
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -20,280 +20,319 @@
|
|
20
20
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
|
+
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
23
27
|
require 'uri'
|
24
28
|
require 'digest/sha1'
|
25
29
|
require 'set'
|
26
30
|
require 'socket'
|
27
31
|
|
28
|
-
module RightScraper
|
32
|
+
module RightScraper::Repositories
|
33
|
+
|
34
|
+
# Description of remote repository that needs to be scraped.
|
35
|
+
#
|
36
|
+
# Repository definitions inherit from this base class. A repository must
|
37
|
+
# register its #repo_type in @@types so that they can be used with
|
38
|
+
# Repositories::Base::from_hash, as follows:
|
39
|
+
#
|
40
|
+
# class Foo < ::RightScraper::Repositories::Base
|
41
|
+
# ...
|
42
|
+
#
|
43
|
+
# # self-register
|
44
|
+
# register_self
|
45
|
+
# register_url_schemas('foo')
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# Subclasses should override #repo_type, #retriever and #to_url; when
|
49
|
+
# sensible, #revision should also be overridden. The most important
|
50
|
+
# methods are #to_url, which will return a +URI+ that completely
|
51
|
+
# characterizes the repository, and #retriever which returns the
|
52
|
+
# appropriate RightScraper::Retrievers::Base to scan that repository.
|
53
|
+
class Base < ::RightScraper::RegisteredBase
|
54
|
+
|
55
|
+
# exceptions
|
56
|
+
class RepositoryError < ::StandardError; end
|
57
|
+
|
58
|
+
# @return [Module] module for registered repository types
|
59
|
+
def self.registration_module
|
60
|
+
::RightScraper::Repositories
|
61
|
+
end
|
29
62
|
|
30
|
-
|
63
|
+
# @return [Set] set of registered repo url schemas
|
64
|
+
def self.registered_url_schemas
|
65
|
+
unless schemas = registration_module.instance_variable_get(:@registered_url_schemas)
|
66
|
+
schemas = ::Set.new(['http', 'https', 'ftp'])
|
67
|
+
registration_module.instance_variable_set(:@registered_url_schemas, schemas)
|
68
|
+
end
|
69
|
+
schemas
|
70
|
+
end
|
31
71
|
|
32
|
-
#
|
72
|
+
# Registers any unknown URL schemas for validation.
|
33
73
|
#
|
34
|
-
#
|
35
|
-
# register its #repo_type in @@types so that they can be used with
|
36
|
-
# Repositories::Base::from_hash, as follows:
|
37
|
-
# class ARepository < Base
|
38
|
-
# ...
|
74
|
+
# @param [Array] args to register as URL schema(s)
|
39
75
|
#
|
40
|
-
#
|
41
|
-
|
42
|
-
|
76
|
+
# @return [TrueClass] always true
|
77
|
+
def self.register_url_schemas(*args)
|
78
|
+
# note that set += blah seems to be badly implemented as set = set + blah
|
79
|
+
# for the Set class, which leaves the original set object unchanged and
|
80
|
+
# will return a new set object with the new data. only use the << operator
|
81
|
+
# to update an existing set object.
|
82
|
+
schemas = registered_url_schemas
|
83
|
+
Array(args).flatten.each { |schema| schemas << schema }
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
# Factory method for a new repository.
|
43
88
|
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# === Return
|
58
|
-
# repo(RightScraper::Repositories::Base):: Resulting repository instance
|
59
|
-
def self.from_hash(opts)
|
60
|
-
repo_class = @@types[opts[:repo_type]]
|
61
|
-
raise "Can't understand how to make #{opts[:repo_type]} repos" if repo_class.nil?
|
62
|
-
repo = repo_class.new
|
63
|
-
unless ENV['DEVELOPMENT']
|
64
|
-
validate_uri opts[:url]
|
65
|
-
end
|
66
|
-
opts.each do |k, v|
|
67
|
-
next if k == :repo_type
|
68
|
-
if [:first_credential, :second_credential].include?(k) && is_useful?(v)
|
69
|
-
v = useful_part(v)
|
70
|
-
end
|
71
|
-
repo.__send__("#{k.to_s}=".to_sym, v)
|
89
|
+
# @param [Hash] repo_hash describing repository to create
|
90
|
+
#
|
91
|
+
# @return [RightScraper::Repositories::Base] repository created
|
92
|
+
def self.from_hash(repo_hash)
|
93
|
+
repo_type = repo_hash[:repo_type].to_s
|
94
|
+
raise ::ArgumentError, ':repo_type is required' if repo_type.empty?
|
95
|
+
repo_class = query_registered_type(repo_type)
|
96
|
+
repo = repo_class.new
|
97
|
+
validate_uri(repo_hash[:url]) unless ENV['DEVELOPMENT']
|
98
|
+
repo_hash.each do |k, v|
|
99
|
+
next if k == :repo_type
|
100
|
+
if [:first_credential, :second_credential].include?(k) && is_useful?(v)
|
101
|
+
v = useful_part(v)
|
72
102
|
end
|
73
|
-
repo
|
103
|
+
repo.__send__("#{k.to_s}=".to_sym, v)
|
74
104
|
end
|
105
|
+
repo
|
106
|
+
end
|
75
107
|
|
76
|
-
|
77
|
-
|
108
|
+
# (String) Human readable repository name used for progress reports
|
109
|
+
attr_accessor :display_name
|
78
110
|
|
79
|
-
|
80
|
-
|
111
|
+
# (Array of String) Subdirectories in the repository to search for resources
|
112
|
+
attr_accessor :resources_path
|
81
113
|
|
82
|
-
|
83
|
-
|
114
|
+
# (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
|
115
|
+
attr_accessor :url
|
84
116
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
117
|
+
# (String) Type of the repository. Currently one of 'git', 'svn'
|
118
|
+
# or 'download', implemented by the appropriate subclass. Needs
|
119
|
+
# to be overridden by subclasses.
|
120
|
+
def repo_type
|
121
|
+
raise NotImplementedError
|
122
|
+
end
|
91
123
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
124
|
+
# (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
|
125
|
+
# repository. Needs to be overridden appropriately by subclasses.
|
126
|
+
#
|
127
|
+
# === Options
|
128
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
129
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
130
|
+
# <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
|
131
|
+
# <tt>:logger</tt>:: Logger to use
|
132
|
+
#
|
133
|
+
# === Returns
|
134
|
+
# retriever(Retrievers::Base):: Corresponding retriever instance
|
135
|
+
def retriever(options)
|
136
|
+
raise NotImplementedError
|
137
|
+
end
|
106
138
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
139
|
+
# Return the revision this repository is currently looking at.
|
140
|
+
#
|
141
|
+
# === Returns
|
142
|
+
# String:: opaque revision type
|
143
|
+
def revision
|
144
|
+
nil
|
145
|
+
end
|
114
146
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
147
|
+
# Return a unique identifier for this repository ignoring the tags
|
148
|
+
# to check out.
|
149
|
+
#
|
150
|
+
# === Returns
|
151
|
+
# String:: opaque unique ID for this repository
|
152
|
+
def repository_hash
|
153
|
+
digest("#{::RightScraper::PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
|
154
|
+
end
|
123
155
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
156
|
+
# Return a unique identifier for this revision in this repository.
|
157
|
+
#
|
158
|
+
# === Returns
|
159
|
+
# String:: opaque unique ID for this revision in this repository
|
160
|
+
def checkout_hash
|
161
|
+
repository_hash
|
162
|
+
end
|
131
163
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
164
|
+
# Unique representation for this repo, should resolve to the same string
|
165
|
+
# for repos that should be cloned in same directory
|
166
|
+
#
|
167
|
+
# === Returns
|
168
|
+
# res(String):: Unique representation for this repo
|
169
|
+
def to_s
|
170
|
+
res = "#{repo_type} #{url}"
|
171
|
+
end
|
140
172
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
173
|
+
# Convert this repository to a URL in the style of resource URLs.
|
174
|
+
#
|
175
|
+
# === Returns
|
176
|
+
# URI:: URL representing this repository
|
177
|
+
def to_url
|
178
|
+
URI.parse(url)
|
179
|
+
end
|
148
180
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
end
|
181
|
+
# Return true if this repository and +other+ represent the same
|
182
|
+
# repository including the same checkout tag.
|
183
|
+
#
|
184
|
+
# === Parameters
|
185
|
+
# other(Repositories::Base):: repository to compare with
|
186
|
+
#
|
187
|
+
# === Returns
|
188
|
+
# Boolean:: true iff this repository and +other+ are the same
|
189
|
+
def ==(other)
|
190
|
+
if other.is_a?(RightScraper::Repositories::Base)
|
191
|
+
checkout_hash == other.checkout_hash
|
192
|
+
else
|
193
|
+
false
|
163
194
|
end
|
195
|
+
end
|
164
196
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
end
|
197
|
+
# Return true if this repository and +other+ represent the same
|
198
|
+
# repository, excluding the checkout tag.
|
199
|
+
#
|
200
|
+
# === Parameters
|
201
|
+
# other(Repositories::Base):: repository to compare with
|
202
|
+
#
|
203
|
+
# === Returns
|
204
|
+
# Boolean:: true iff this repository and +other+ are the same
|
205
|
+
def equal_repo?(other)
|
206
|
+
if other.is_a?(RightScraper::Repositories::Base)
|
207
|
+
repository_hash == other.repository_hash
|
208
|
+
else
|
209
|
+
false
|
179
210
|
end
|
211
|
+
end
|
180
212
|
|
181
|
-
|
182
|
-
# ('git', 'svn' or 'download' currently) to the class that
|
183
|
-
# represents that repository.
|
184
|
-
@@types = {} unless class_variable_defined?(:@@types)
|
213
|
+
protected
|
185
214
|
|
186
|
-
|
187
|
-
|
215
|
+
# Return true iff this credential is useful. Currently "useful"
|
216
|
+
# means "nonempty and not all spaces".
|
217
|
+
def self.is_useful?(credential)
|
218
|
+
credential && !credential.strip.empty?
|
219
|
+
end
|
188
220
|
|
189
|
-
|
221
|
+
# Return the useful portion of this credential. Currently strips
|
222
|
+
# out any spaces.
|
223
|
+
def self.useful_part(credential)
|
224
|
+
credential.strip
|
225
|
+
end
|
190
226
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
227
|
+
# Compute a unique identifier for the given string. Currently uses SHA1.
|
228
|
+
#
|
229
|
+
# === Parameters
|
230
|
+
# string(String):: string to compute unique identifier for
|
231
|
+
#
|
232
|
+
# === Returns
|
233
|
+
# String:: unique identifier
|
234
|
+
def digest(string)
|
235
|
+
Digest::SHA1.hexdigest(string)
|
236
|
+
end
|
196
237
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
credential.strip
|
201
|
-
end
|
238
|
+
# Regexp matching everything not allowed in a URI and also ':',
|
239
|
+
# '@' and '/', to be used for encoding usernames and passwords.
|
240
|
+
USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
|
202
241
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
242
|
+
# Return a URI with the given username and password set.
|
243
|
+
#
|
244
|
+
# === Parameters
|
245
|
+
# uri(URI or String):: URI to add user identification to
|
246
|
+
#
|
247
|
+
# === Returns
|
248
|
+
# URI:: URI with username and password identification added
|
249
|
+
def add_users_to(uri, username=nil, password=nil)
|
250
|
+
begin
|
251
|
+
uri = URI.parse(uri) if uri.instance_of?(String)
|
252
|
+
if username
|
253
|
+
userinfo = URI.escape(username, USERPW)
|
254
|
+
userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
|
255
|
+
uri.userinfo = userinfo
|
256
|
+
end
|
257
|
+
uri
|
258
|
+
rescue URI::InvalidURIError
|
259
|
+
if uri =~ PATTERN::GIT_URI
|
260
|
+
user, host, path = $1, $2, $3
|
261
|
+
userinfo = URI.escape(user, USERPW)
|
262
|
+
userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
|
263
|
+
path = "/" + path unless path.start_with?('/')
|
264
|
+
URI::Generic::build({:scheme => "ssh",
|
265
|
+
:userinfo => userinfo,
|
266
|
+
:host => host,
|
267
|
+
:path => path
|
268
|
+
})
|
269
|
+
else
|
270
|
+
raise
|
271
|
+
end
|
212
272
|
end
|
273
|
+
end
|
213
274
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
if uri =~ PATTERN::GIT_URI
|
236
|
-
user, host, path = $1, $2, $3
|
237
|
-
userinfo = URI.escape(user, USERPW)
|
238
|
-
userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
|
239
|
-
path = "/" + path unless path.start_with?('/')
|
240
|
-
URI::Generic::build({:scheme => "ssh",
|
241
|
-
:userinfo => userinfo,
|
242
|
-
:host => host,
|
243
|
-
:path => path
|
244
|
-
})
|
245
|
-
else
|
246
|
-
raise
|
247
|
-
end
|
275
|
+
module PATTERN
|
276
|
+
include URI::REGEXP::PATTERN
|
277
|
+
GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
|
278
|
+
end
|
279
|
+
|
280
|
+
SSH_PORT = 22
|
281
|
+
|
282
|
+
def self.validate_uri(uri)
|
283
|
+
begin
|
284
|
+
uri = URI.parse(uri) if uri.instance_of?(String)
|
285
|
+
unless registered_url_schemas.include?(uri.scheme)
|
286
|
+
raise RepositoryError,
|
287
|
+
"Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}"
|
288
|
+
end
|
289
|
+
check_host(uri, uri.host, uri.port)
|
290
|
+
rescue URI::InvalidURIError
|
291
|
+
# could be a Git type URI.
|
292
|
+
if uri =~ PATTERN::GIT_URI
|
293
|
+
check_host(uri, $2, SSH_PORT)
|
294
|
+
else
|
295
|
+
raise
|
248
296
|
end
|
249
297
|
end
|
298
|
+
end
|
250
299
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
300
|
+
def self.check_host(uri, host, port)
|
301
|
+
begin
|
302
|
+
possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
|
303
|
+
if possibles.nil? || possibles.empty?
|
304
|
+
raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
305
|
+
end
|
306
|
+
possibles.each do |possible|
|
307
|
+
family, port, hostname, address, protocol_family, socket_type, protocol = possible
|
255
308
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
# could be a Git type URI.
|
265
|
-
if uri =~ PATTERN::GIT_URI
|
266
|
-
check_host(uri, $2, SSH_PORT)
|
267
|
-
else
|
268
|
-
raise
|
309
|
+
# Our EC2 gateway is not permitted.
|
310
|
+
if address == '169.254.169.254'
|
311
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
312
|
+
end
|
313
|
+
|
314
|
+
# Loopbacks are not permitted.
|
315
|
+
if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
|
316
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
269
317
|
end
|
270
|
-
end
|
271
|
-
end
|
272
318
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
raise "Invalid URI #{uri}"
|
282
|
-
# Loopbacks are not permitted.
|
283
|
-
raise "Invalid URI #{uri}" if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
|
284
|
-
|
285
|
-
# Private networks are not permitted
|
286
|
-
raise "Invalid URI #{uri}" if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
|
287
|
-
raise "Invalid URI #{uri}" if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
|
288
|
-
raise "Invalid URI #{uri}" if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
|
319
|
+
# Private networks are not permitted
|
320
|
+
if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
|
321
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
322
|
+
end
|
323
|
+
if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
|
324
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
325
|
+
end
|
326
|
+
if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
|
327
|
+
raise RepositoryError, "Invalid URI #{uri}"
|
289
328
|
end
|
290
|
-
true
|
291
|
-
rescue SocketError
|
292
|
-
# means the host doesn't exist
|
293
|
-
raise "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
294
329
|
end
|
330
|
+
true
|
331
|
+
rescue SocketError
|
332
|
+
# means the host doesn't exist
|
333
|
+
raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
|
295
334
|
end
|
296
|
-
|
297
335
|
end
|
336
|
+
|
298
337
|
end
|
299
338
|
end
|