right_scraper 3.2.6 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright (c) 2013 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # ancestor
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Repositories
28
+ autoload :Base, 'right_scraper/repositories/base'
29
+ autoload :Download, 'right_scraper/repositories/download'
30
+ autoload :Git, 'right_scraper/repositories/git'
31
+ autoload :Svn, 'right_scraper/repositories/svn'
32
+ end
33
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -20,280 +20,319 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
+
24
+ # ancestor
25
+ require 'right_scraper/repositories'
26
+
23
27
  require 'uri'
24
28
  require 'digest/sha1'
25
29
  require 'set'
26
30
  require 'socket'
27
31
 
28
- module RightScraper
32
+ module RightScraper::Repositories
33
+
34
+ # Description of remote repository that needs to be scraped.
35
+ #
36
+ # Repository definitions inherit from this base class. A repository must
37
+ # register its #repo_type in @@types so that they can be used with
38
+ # Repositories::Base::from_hash, as follows:
39
+ #
40
+ # class Foo < ::RightScraper::Repositories::Base
41
+ # ...
42
+ #
43
+ # # self-register
44
+ # register_self
45
+ # register_url_schemas('foo')
46
+ # end
47
+ #
48
+ # Subclasses should override #repo_type, #retriever and #to_url; when
49
+ # sensible, #revision should also be overridden. The most important
50
+ # methods are #to_url, which will return a +URI+ that completely
51
+ # characterizes the repository, and #retriever which returns the
52
+ # appropriate RightScraper::Retrievers::Base to scan that repository.
53
+ class Base < ::RightScraper::RegisteredBase
54
+
55
+ # exceptions
56
+ class RepositoryError < ::StandardError; end
57
+
58
+ # @return [Module] module for registered repository types
59
+ def self.registration_module
60
+ ::RightScraper::Repositories
61
+ end
29
62
 
30
- module Repositories
63
+ # @return [Set] set of registered repo url schemas
64
+ def self.registered_url_schemas
65
+ unless schemas = registration_module.instance_variable_get(:@registered_url_schemas)
66
+ schemas = ::Set.new(['http', 'https', 'ftp'])
67
+ registration_module.instance_variable_set(:@registered_url_schemas, schemas)
68
+ end
69
+ schemas
70
+ end
31
71
 
32
- # Description of remote repository that needs to be scraped.
72
+ # Registers any unknown URL schemas for validation.
33
73
  #
34
- # Repository definitions inherit from this base class. A repository must
35
- # register its #repo_type in @@types so that they can be used with
36
- # Repositories::Base::from_hash, as follows:
37
- # class ARepository < Base
38
- # ...
74
+ # @param [Array] args to register as URL schema(s)
39
75
  #
40
- # # Add this repository to the list of available types.
41
- # @@types[:arepository] = ARepository
42
- # end
76
+ # @return [TrueClass] always true
77
+ def self.register_url_schemas(*args)
78
+ # note that set += blah seems to be badly implemented as set = set + blah
79
+ # for the Set class, which leaves the original set object unchanged and
80
+ # will return a new set object with the new data. only use the << operator
81
+ # to update an existing set object.
82
+ schemas = registered_url_schemas
83
+ Array(args).flatten.each { |schema| schemas << schema }
84
+ true
85
+ end
86
+
87
+ # Factory method for a new repository.
43
88
  #
44
- # Subclasses should override #repo_type, #retriever and #to_url; when
45
- # sensible, #revision should also be overridden. The most important
46
- # methods are #to_url, which will return a +URI+ that completely
47
- # characterizes the repository, and #retriever which returns the
48
- # appropriate RightScraper::Retrievers::Base to scan that repository.
49
- class Base
50
-
51
- # Initialize repository from given hash
52
- # Hash keys should correspond to attributes of this class
53
- #
54
- # === Parameters
55
- # opts(Hash):: Hash to be converted into a RightScraper::Repositories::Base instance
56
- #
57
- # === Return
58
- # repo(RightScraper::Repositories::Base):: Resulting repository instance
59
- def self.from_hash(opts)
60
- repo_class = @@types[opts[:repo_type]]
61
- raise "Can't understand how to make #{opts[:repo_type]} repos" if repo_class.nil?
62
- repo = repo_class.new
63
- unless ENV['DEVELOPMENT']
64
- validate_uri opts[:url]
65
- end
66
- opts.each do |k, v|
67
- next if k == :repo_type
68
- if [:first_credential, :second_credential].include?(k) && is_useful?(v)
69
- v = useful_part(v)
70
- end
71
- repo.__send__("#{k.to_s}=".to_sym, v)
89
+ # @param [Hash] repo_hash describing repository to create
90
+ #
91
+ # @return [RightScraper::Repositories::Base] repository created
92
+ def self.from_hash(repo_hash)
93
+ repo_type = repo_hash[:repo_type].to_s
94
+ raise ::ArgumentError, ':repo_type is required' if repo_type.empty?
95
+ repo_class = query_registered_type(repo_type)
96
+ repo = repo_class.new
97
+ validate_uri(repo_hash[:url]) unless ENV['DEVELOPMENT']
98
+ repo_hash.each do |k, v|
99
+ next if k == :repo_type
100
+ if [:first_credential, :second_credential].include?(k) && is_useful?(v)
101
+ v = useful_part(v)
72
102
  end
73
- repo
103
+ repo.__send__("#{k.to_s}=".to_sym, v)
74
104
  end
105
+ repo
106
+ end
75
107
 
76
- # (String) Human readable repository name used for progress reports
77
- attr_accessor :display_name
108
+ # (String) Human readable repository name used for progress reports
109
+ attr_accessor :display_name
78
110
 
79
- # (Array of String) Subdirectories in the repository to search for resources
80
- attr_accessor :resources_path
111
+ # (Array of String) Subdirectories in the repository to search for resources
112
+ attr_accessor :resources_path
81
113
 
82
- # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
83
- attr_accessor :url
114
+ # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
115
+ attr_accessor :url
84
116
 
85
- # (String) Type of the repository. Currently one of 'git', 'svn'
86
- # or 'download', implemented by the appropriate subclass. Needs
87
- # to be overridden by subclasses.
88
- def repo_type
89
- raise NotImplementedError
90
- end
117
+ # (String) Type of the repository. Currently one of 'git', 'svn'
118
+ # or 'download', implemented by the appropriate subclass. Needs
119
+ # to be overridden by subclasses.
120
+ def repo_type
121
+ raise NotImplementedError
122
+ end
91
123
 
92
- # (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
93
- # repository. Needs to be overridden appropriately by subclasses.
94
- #
95
- # === Options
96
- # <tt>:max_bytes</tt>:: Maximum number of bytes to read
97
- # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
98
- # <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
99
- # <tt>:logger</tt>:: Logger to use
100
- #
101
- # === Returns
102
- # retriever(Retrievers::Base):: Corresponding retriever instance
103
- def retriever(options)
104
- raise NotImplementedError
105
- end
124
+ # (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
125
+ # repository. Needs to be overridden appropriately by subclasses.
126
+ #
127
+ # === Options
128
+ # <tt>:max_bytes</tt>:: Maximum number of bytes to read
129
+ # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
130
+ # <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
131
+ # <tt>:logger</tt>:: Logger to use
132
+ #
133
+ # === Returns
134
+ # retriever(Retrievers::Base):: Corresponding retriever instance
135
+ def retriever(options)
136
+ raise NotImplementedError
137
+ end
106
138
 
107
- # Return the revision this repository is currently looking at.
108
- #
109
- # === Returns
110
- # String:: opaque revision type
111
- def revision
112
- nil
113
- end
139
+ # Return the revision this repository is currently looking at.
140
+ #
141
+ # === Returns
142
+ # String:: opaque revision type
143
+ def revision
144
+ nil
145
+ end
114
146
 
115
- # Return a unique identifier for this repository ignoring the tags
116
- # to check out.
117
- #
118
- # === Returns
119
- # String:: opaque unique ID for this repository
120
- def repository_hash
121
- digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
122
- end
147
+ # Return a unique identifier for this repository ignoring the tags
148
+ # to check out.
149
+ #
150
+ # === Returns
151
+ # String:: opaque unique ID for this repository
152
+ def repository_hash
153
+ digest("#{::RightScraper::PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
154
+ end
123
155
 
124
- # Return a unique identifier for this revision in this repository.
125
- #
126
- # === Returns
127
- # String:: opaque unique ID for this revision in this repository
128
- def checkout_hash
129
- repository_hash
130
- end
156
+ # Return a unique identifier for this revision in this repository.
157
+ #
158
+ # === Returns
159
+ # String:: opaque unique ID for this revision in this repository
160
+ def checkout_hash
161
+ repository_hash
162
+ end
131
163
 
132
- # Unique representation for this repo, should resolve to the same string
133
- # for repos that should be cloned in same directory
134
- #
135
- # === Returns
136
- # res(String):: Unique representation for this repo
137
- def to_s
138
- res = "#{repo_type} #{url}"
139
- end
164
+ # Unique representation for this repo, should resolve to the same string
165
+ # for repos that should be cloned in same directory
166
+ #
167
+ # === Returns
168
+ # res(String):: Unique representation for this repo
169
+ def to_s
170
+ res = "#{repo_type} #{url}"
171
+ end
140
172
 
141
- # Convert this repository to a URL in the style of resource URLs.
142
- #
143
- # === Returns
144
- # URI:: URL representing this repository
145
- def to_url
146
- URI.parse(url)
147
- end
173
+ # Convert this repository to a URL in the style of resource URLs.
174
+ #
175
+ # === Returns
176
+ # URI:: URL representing this repository
177
+ def to_url
178
+ URI.parse(url)
179
+ end
148
180
 
149
- # Return true if this repository and +other+ represent the same
150
- # repository including the same checkout tag.
151
- #
152
- # === Parameters
153
- # other(Repositories::Base):: repository to compare with
154
- #
155
- # === Returns
156
- # Boolean:: true iff this repository and +other+ are the same
157
- def ==(other)
158
- if other.is_a?(RightScraper::Repositories::Base)
159
- checkout_hash == other.checkout_hash
160
- else
161
- false
162
- end
181
+ # Return true if this repository and +other+ represent the same
182
+ # repository including the same checkout tag.
183
+ #
184
+ # === Parameters
185
+ # other(Repositories::Base):: repository to compare with
186
+ #
187
+ # === Returns
188
+ # Boolean:: true iff this repository and +other+ are the same
189
+ def ==(other)
190
+ if other.is_a?(RightScraper::Repositories::Base)
191
+ checkout_hash == other.checkout_hash
192
+ else
193
+ false
163
194
  end
195
+ end
164
196
 
165
- # Return true if this repository and +other+ represent the same
166
- # repository, excluding the checkout tag.
167
- #
168
- # === Parameters
169
- # other(Repositories::Base):: repository to compare with
170
- #
171
- # === Returns
172
- # Boolean:: true iff this repository and +other+ are the same
173
- def equal_repo?(other)
174
- if other.is_a?(RightScraper::Repositories::Base)
175
- repository_hash == other.repository_hash
176
- else
177
- false
178
- end
197
+ # Return true if this repository and +other+ represent the same
198
+ # repository, excluding the checkout tag.
199
+ #
200
+ # === Parameters
201
+ # other(Repositories::Base):: repository to compare with
202
+ #
203
+ # === Returns
204
+ # Boolean:: true iff this repository and +other+ are the same
205
+ def equal_repo?(other)
206
+ if other.is_a?(RightScraper::Repositories::Base)
207
+ repository_hash == other.repository_hash
208
+ else
209
+ false
179
210
  end
211
+ end
180
212
 
181
- # (Hash) Lookup table from textual description of repository type
182
- # ('git', 'svn' or 'download' currently) to the class that
183
- # represents that repository.
184
- @@types = {} unless class_variable_defined?(:@@types)
213
+ protected
185
214
 
186
- # (Set) list of acceptable URI schemes. Initially just http, https and ftp.
187
- @@okay_schemes = Set.new(["http", "https", "ftp"])
215
+ # Return true iff this credential is useful. Currently "useful"
216
+ # means "nonempty and not all spaces".
217
+ def self.is_useful?(credential)
218
+ credential && !credential.strip.empty?
219
+ end
188
220
 
189
- protected
221
+ # Return the useful portion of this credential. Currently strips
222
+ # out any spaces.
223
+ def self.useful_part(credential)
224
+ credential.strip
225
+ end
190
226
 
191
- # Return true iff this credential is useful. Currently "useful"
192
- # means "nonempty and not all spaces".
193
- def self.is_useful?(credential)
194
- credential && !credential.strip.empty?
195
- end
227
+ # Compute a unique identifier for the given string. Currently uses SHA1.
228
+ #
229
+ # === Parameters
230
+ # string(String):: string to compute unique identifier for
231
+ #
232
+ # === Returns
233
+ # String:: unique identifier
234
+ def digest(string)
235
+ Digest::SHA1.hexdigest(string)
236
+ end
196
237
 
197
- # Return the useful portion of this credential. Currently strips
198
- # out any spaces.
199
- def self.useful_part(credential)
200
- credential.strip
201
- end
238
+ # Regexp matching everything not allowed in a URI and also ':',
239
+ # '@' and '/', to be used for encoding usernames and passwords.
240
+ USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
202
241
 
203
- # Compute a unique identifier for the given string. Currently uses SHA1.
204
- #
205
- # === Parameters
206
- # string(String):: string to compute unique identifier for
207
- #
208
- # === Returns
209
- # String:: unique identifier
210
- def digest(string)
211
- Digest::SHA1.hexdigest(string)
242
+ # Return a URI with the given username and password set.
243
+ #
244
+ # === Parameters
245
+ # uri(URI or String):: URI to add user identification to
246
+ #
247
+ # === Returns
248
+ # URI:: URI with username and password identification added
249
+ def add_users_to(uri, username=nil, password=nil)
250
+ begin
251
+ uri = URI.parse(uri) if uri.instance_of?(String)
252
+ if username
253
+ userinfo = URI.escape(username, USERPW)
254
+ userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
255
+ uri.userinfo = userinfo
256
+ end
257
+ uri
258
+ rescue URI::InvalidURIError
259
+ if uri =~ PATTERN::GIT_URI
260
+ user, host, path = $1, $2, $3
261
+ userinfo = URI.escape(user, USERPW)
262
+ userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
263
+ path = "/" + path unless path.start_with?('/')
264
+ URI::Generic::build({:scheme => "ssh",
265
+ :userinfo => userinfo,
266
+ :host => host,
267
+ :path => path
268
+ })
269
+ else
270
+ raise
271
+ end
212
272
  end
273
+ end
213
274
 
214
- # Regexp matching everything not allowed in a URI and also ':',
215
- # '@' and '/', to be used for encoding usernames and passwords.
216
- USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
217
-
218
- # Return a URI with the given username and password set.
219
- #
220
- # === Parameters
221
- # uri(URI or String):: URI to add user identification to
222
- #
223
- # === Returns
224
- # URI:: URI with username and password identification added
225
- def add_users_to(uri, username=nil, password=nil)
226
- begin
227
- uri = URI.parse(uri) if uri.instance_of?(String)
228
- if username
229
- userinfo = URI.escape(username, USERPW)
230
- userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
231
- uri.userinfo = userinfo
232
- end
233
- uri
234
- rescue URI::InvalidURIError
235
- if uri =~ PATTERN::GIT_URI
236
- user, host, path = $1, $2, $3
237
- userinfo = URI.escape(user, USERPW)
238
- userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
239
- path = "/" + path unless path.start_with?('/')
240
- URI::Generic::build({:scheme => "ssh",
241
- :userinfo => userinfo,
242
- :host => host,
243
- :path => path
244
- })
245
- else
246
- raise
247
- end
275
+ module PATTERN
276
+ include URI::REGEXP::PATTERN
277
+ GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
278
+ end
279
+
280
+ SSH_PORT = 22
281
+
282
+ def self.validate_uri(uri)
283
+ begin
284
+ uri = URI.parse(uri) if uri.instance_of?(String)
285
+ unless registered_url_schemas.include?(uri.scheme)
286
+ raise RepositoryError,
287
+ "Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}"
288
+ end
289
+ check_host(uri, uri.host, uri.port)
290
+ rescue URI::InvalidURIError
291
+ # could be a Git type URI.
292
+ if uri =~ PATTERN::GIT_URI
293
+ check_host(uri, $2, SSH_PORT)
294
+ else
295
+ raise
248
296
  end
249
297
  end
298
+ end
250
299
 
251
- module PATTERN
252
- include URI::REGEXP::PATTERN
253
- GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
254
- end
300
+ def self.check_host(uri, host, port)
301
+ begin
302
+ possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
303
+ if possibles.nil? || possibles.empty?
304
+ raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
305
+ end
306
+ possibles.each do |possible|
307
+ family, port, hostname, address, protocol_family, socket_type, protocol = possible
255
308
 
256
- SSH_PORT = 22
257
-
258
- def self.validate_uri(uri)
259
- begin
260
- uri = URI.parse(uri) if uri.instance_of?(String)
261
- raise "Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}" unless @@okay_schemes.include?(uri.scheme)
262
- check_host(uri, uri.host, uri.port)
263
- rescue URI::InvalidURIError
264
- # could be a Git type URI.
265
- if uri =~ PATTERN::GIT_URI
266
- check_host(uri, $2, SSH_PORT)
267
- else
268
- raise
309
+ # Our EC2 gateway is not permitted.
310
+ if address == '169.254.169.254'
311
+ raise RepositoryError, "Invalid URI #{uri}"
312
+ end
313
+
314
+ # Loopbacks are not permitted.
315
+ if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
316
+ raise RepositoryError, "Invalid URI #{uri}"
269
317
  end
270
- end
271
- end
272
318
 
273
- def self.check_host(uri, host, port)
274
- begin
275
- possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
276
- raise "Invalid URI #{uri}: no hosts for #{host}:#{port}" if possibles.nil? || possibles.empty?
277
- possibles.each do |possible|
278
- family, port, hostname, address, protocol_family, socket_type, protocol = possible
279
-
280
- # Our EC2 gateway is not permitted.
281
- raise "Invalid URI #{uri}" if address == "169.254.169.254"
282
- # Loopbacks are not permitted.
283
- raise "Invalid URI #{uri}" if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
284
-
285
- # Private networks are not permitted
286
- raise "Invalid URI #{uri}" if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
287
- raise "Invalid URI #{uri}" if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
288
- raise "Invalid URI #{uri}" if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
319
+ # Private networks are not permitted
320
+ if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
321
+ raise RepositoryError, "Invalid URI #{uri}"
322
+ end
323
+ if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
324
+ raise RepositoryError, "Invalid URI #{uri}"
325
+ end
326
+ if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
327
+ raise RepositoryError, "Invalid URI #{uri}"
289
328
  end
290
- true
291
- rescue SocketError
292
- # means the host doesn't exist
293
- raise "Invalid URI #{uri}: no hosts for #{host}:#{port}"
294
329
  end
330
+ true
331
+ rescue SocketError
332
+ # means the host doesn't exist
333
+ raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
295
334
  end
296
-
297
335
  end
336
+
298
337
  end
299
338
  end