right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright (c) 2013 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # ancestor
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Repositories
28
+ autoload :Base, 'right_scraper/repositories/base'
29
+ autoload :Download, 'right_scraper/repositories/download'
30
+ autoload :Git, 'right_scraper/repositories/git'
31
+ autoload :Svn, 'right_scraper/repositories/svn'
32
+ end
33
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -20,280 +20,319 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
+
24
+ # ancestor
25
+ require 'right_scraper/repositories'
26
+
23
27
  require 'uri'
24
28
  require 'digest/sha1'
25
29
  require 'set'
26
30
  require 'socket'
27
31
 
28
- module RightScraper
32
+ module RightScraper::Repositories
33
+
34
+ # Description of remote repository that needs to be scraped.
35
+ #
36
+ # Repository definitions inherit from this base class. A repository must
37
+ # register its #repo_type in @@types so that they can be used with
38
+ # Repositories::Base::from_hash, as follows:
39
+ #
40
+ # class Foo < ::RightScraper::Repositories::Base
41
+ # ...
42
+ #
43
+ # # self-register
44
+ # register_self
45
+ # register_url_schemas('foo')
46
+ # end
47
+ #
48
+ # Subclasses should override #repo_type, #retriever and #to_url; when
49
+ # sensible, #revision should also be overridden. The most important
50
+ # methods are #to_url, which will return a +URI+ that completely
51
+ # characterizes the repository, and #retriever which returns the
52
+ # appropriate RightScraper::Retrievers::Base to scan that repository.
53
+ class Base < ::RightScraper::RegisteredBase
54
+
55
+ # exceptions
56
+ class RepositoryError < ::StandardError; end
57
+
58
+ # @return [Module] module for registered repository types
59
+ def self.registration_module
60
+ ::RightScraper::Repositories
61
+ end
29
62
 
30
- module Repositories
63
+ # @return [Set] set of registered repo url schemas
64
+ def self.registered_url_schemas
65
+ unless schemas = registration_module.instance_variable_get(:@registered_url_schemas)
66
+ schemas = ::Set.new(['http', 'https', 'ftp'])
67
+ registration_module.instance_variable_set(:@registered_url_schemas, schemas)
68
+ end
69
+ schemas
70
+ end
31
71
 
32
- # Description of remote repository that needs to be scraped.
72
+ # Registers any unknown URL schemas for validation.
33
73
  #
34
- # Repository definitions inherit from this base class. A repository must
35
- # register its #repo_type in @@types so that they can be used with
36
- # Repositories::Base::from_hash, as follows:
37
- # class ARepository < Base
38
- # ...
74
+ # @param [Array] args to register as URL schema(s)
39
75
  #
40
- # # Add this repository to the list of available types.
41
- # @@types[:arepository] = ARepository
42
- # end
76
+ # @return [TrueClass] always true
77
+ def self.register_url_schemas(*args)
78
+ # note that set += blah seems to be badly implemented as set = set + blah
79
+ # for the Set class, which leaves the original set object unchanged and
80
+ # will return a new set object with the new data. only use the << operator
81
+ # to update an existing set object.
82
+ schemas = registered_url_schemas
83
+ Array(args).flatten.each { |schema| schemas << schema }
84
+ true
85
+ end
86
+
87
+ # Factory method for a new repository.
43
88
  #
44
- # Subclasses should override #repo_type, #retriever and #to_url; when
45
- # sensible, #revision should also be overridden. The most important
46
- # methods are #to_url, which will return a +URI+ that completely
47
- # characterizes the repository, and #retriever which returns the
48
- # appropriate RightScraper::Retrievers::Base to scan that repository.
49
- class Base
50
-
51
- # Initialize repository from given hash
52
- # Hash keys should correspond to attributes of this class
53
- #
54
- # === Parameters
55
- # opts(Hash):: Hash to be converted into a RightScraper::Repositories::Base instance
56
- #
57
- # === Return
58
- # repo(RightScraper::Repositories::Base):: Resulting repository instance
59
- def self.from_hash(opts)
60
- repo_class = @@types[opts[:repo_type]]
61
- raise "Can't understand how to make #{opts[:repo_type]} repos" if repo_class.nil?
62
- repo = repo_class.new
63
- unless ENV['DEVELOPMENT']
64
- validate_uri opts[:url]
65
- end
66
- opts.each do |k, v|
67
- next if k == :repo_type
68
- if [:first_credential, :second_credential].include?(k) && is_useful?(v)
69
- v = useful_part(v)
70
- end
71
- repo.__send__("#{k.to_s}=".to_sym, v)
89
+ # @param [Hash] repo_hash describing repository to create
90
+ #
91
+ # @return [RightScraper::Repositories::Base] repository created
92
+ def self.from_hash(repo_hash)
93
+ repo_type = repo_hash[:repo_type].to_s
94
+ raise ::ArgumentError, ':repo_type is required' if repo_type.empty?
95
+ repo_class = query_registered_type(repo_type)
96
+ repo = repo_class.new
97
+ validate_uri(repo_hash[:url]) unless ENV['DEVELOPMENT']
98
+ repo_hash.each do |k, v|
99
+ next if k == :repo_type
100
+ if [:first_credential, :second_credential].include?(k) && is_useful?(v)
101
+ v = useful_part(v)
72
102
  end
73
- repo
103
+ repo.__send__("#{k.to_s}=".to_sym, v)
74
104
  end
105
+ repo
106
+ end
75
107
 
76
- # (String) Human readable repository name used for progress reports
77
- attr_accessor :display_name
108
+ # (String) Human readable repository name used for progress reports
109
+ attr_accessor :display_name
78
110
 
79
- # (Array of String) Subdirectories in the repository to search for resources
80
- attr_accessor :resources_path
111
+ # (Array of String) Subdirectories in the repository to search for resources
112
+ attr_accessor :resources_path
81
113
 
82
- # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
83
- attr_accessor :url
114
+ # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
115
+ attr_accessor :url
84
116
 
85
- # (String) Type of the repository. Currently one of 'git', 'svn'
86
- # or 'download', implemented by the appropriate subclass. Needs
87
- # to be overridden by subclasses.
88
- def repo_type
89
- raise NotImplementedError
90
- end
117
+ # (String) Type of the repository. Currently one of 'git', 'svn'
118
+ # or 'download', implemented by the appropriate subclass. Needs
119
+ # to be overridden by subclasses.
120
+ def repo_type
121
+ raise NotImplementedError
122
+ end
91
123
 
92
- # (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
93
- # repository. Needs to be overridden appropriately by subclasses.
94
- #
95
- # === Options
96
- # <tt>:max_bytes</tt>:: Maximum number of bytes to read
97
- # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
98
- # <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
99
- # <tt>:logger</tt>:: Logger to use
100
- #
101
- # === Returns
102
- # retriever(Retrievers::Base):: Corresponding retriever instance
103
- def retriever(options)
104
- raise NotImplementedError
105
- end
124
+ # (RightScraper::Retrievers::Base class) Appropriate class for retrieving this sort of
125
+ # repository. Needs to be overridden appropriately by subclasses.
126
+ #
127
+ # === Options
128
+ # <tt>:max_bytes</tt>:: Maximum number of bytes to read
129
+ # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
130
+ # <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
131
+ # <tt>:logger</tt>:: Logger to use
132
+ #
133
+ # === Returns
134
+ # retriever(Retrievers::Base):: Corresponding retriever instance
135
+ def retriever(options)
136
+ raise NotImplementedError
137
+ end
106
138
 
107
- # Return the revision this repository is currently looking at.
108
- #
109
- # === Returns
110
- # String:: opaque revision type
111
- def revision
112
- nil
113
- end
139
+ # Return the revision this repository is currently looking at.
140
+ #
141
+ # === Returns
142
+ # String:: opaque revision type
143
+ def revision
144
+ nil
145
+ end
114
146
 
115
- # Return a unique identifier for this repository ignoring the tags
116
- # to check out.
117
- #
118
- # === Returns
119
- # String:: opaque unique ID for this repository
120
- def repository_hash
121
- digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
122
- end
147
+ # Return a unique identifier for this repository ignoring the tags
148
+ # to check out.
149
+ #
150
+ # === Returns
151
+ # String:: opaque unique ID for this repository
152
+ def repository_hash
153
+ digest("#{::RightScraper::PROTOCOL_VERSION}\000#{repo_type}\000#{url}")
154
+ end
123
155
 
124
- # Return a unique identifier for this revision in this repository.
125
- #
126
- # === Returns
127
- # String:: opaque unique ID for this revision in this repository
128
- def checkout_hash
129
- repository_hash
130
- end
156
+ # Return a unique identifier for this revision in this repository.
157
+ #
158
+ # === Returns
159
+ # String:: opaque unique ID for this revision in this repository
160
+ def checkout_hash
161
+ repository_hash
162
+ end
131
163
 
132
- # Unique representation for this repo, should resolve to the same string
133
- # for repos that should be cloned in same directory
134
- #
135
- # === Returns
136
- # res(String):: Unique representation for this repo
137
- def to_s
138
- res = "#{repo_type} #{url}"
139
- end
164
+ # Unique representation for this repo, should resolve to the same string
165
+ # for repos that should be cloned in same directory
166
+ #
167
+ # === Returns
168
+ # res(String):: Unique representation for this repo
169
+ def to_s
170
+ res = "#{repo_type} #{url}"
171
+ end
140
172
 
141
- # Convert this repository to a URL in the style of resource URLs.
142
- #
143
- # === Returns
144
- # URI:: URL representing this repository
145
- def to_url
146
- URI.parse(url)
147
- end
173
+ # Convert this repository to a URL in the style of resource URLs.
174
+ #
175
+ # === Returns
176
+ # URI:: URL representing this repository
177
+ def to_url
178
+ URI.parse(url)
179
+ end
148
180
 
149
- # Return true if this repository and +other+ represent the same
150
- # repository including the same checkout tag.
151
- #
152
- # === Parameters
153
- # other(Repositories::Base):: repository to compare with
154
- #
155
- # === Returns
156
- # Boolean:: true iff this repository and +other+ are the same
157
- def ==(other)
158
- if other.is_a?(RightScraper::Repositories::Base)
159
- checkout_hash == other.checkout_hash
160
- else
161
- false
162
- end
181
+ # Return true if this repository and +other+ represent the same
182
+ # repository including the same checkout tag.
183
+ #
184
+ # === Parameters
185
+ # other(Repositories::Base):: repository to compare with
186
+ #
187
+ # === Returns
188
+ # Boolean:: true iff this repository and +other+ are the same
189
+ def ==(other)
190
+ if other.is_a?(RightScraper::Repositories::Base)
191
+ checkout_hash == other.checkout_hash
192
+ else
193
+ false
163
194
  end
195
+ end
164
196
 
165
- # Return true if this repository and +other+ represent the same
166
- # repository, excluding the checkout tag.
167
- #
168
- # === Parameters
169
- # other(Repositories::Base):: repository to compare with
170
- #
171
- # === Returns
172
- # Boolean:: true iff this repository and +other+ are the same
173
- def equal_repo?(other)
174
- if other.is_a?(RightScraper::Repositories::Base)
175
- repository_hash == other.repository_hash
176
- else
177
- false
178
- end
197
+ # Return true if this repository and +other+ represent the same
198
+ # repository, excluding the checkout tag.
199
+ #
200
+ # === Parameters
201
+ # other(Repositories::Base):: repository to compare with
202
+ #
203
+ # === Returns
204
+ # Boolean:: true iff this repository and +other+ are the same
205
+ def equal_repo?(other)
206
+ if other.is_a?(RightScraper::Repositories::Base)
207
+ repository_hash == other.repository_hash
208
+ else
209
+ false
179
210
  end
211
+ end
180
212
 
181
- # (Hash) Lookup table from textual description of repository type
182
- # ('git', 'svn' or 'download' currently) to the class that
183
- # represents that repository.
184
- @@types = {} unless class_variable_defined?(:@@types)
213
+ protected
185
214
 
186
- # (Set) list of acceptable URI schemes. Initially just http, https and ftp.
187
- @@okay_schemes = Set.new(["http", "https", "ftp"])
215
+ # Return true iff this credential is useful. Currently "useful"
216
+ # means "nonempty and not all spaces".
217
+ def self.is_useful?(credential)
218
+ credential && !credential.strip.empty?
219
+ end
188
220
 
189
- protected
221
+ # Return the useful portion of this credential. Currently strips
222
+ # out any spaces.
223
+ def self.useful_part(credential)
224
+ credential.strip
225
+ end
190
226
 
191
- # Return true iff this credential is useful. Currently "useful"
192
- # means "nonempty and not all spaces".
193
- def self.is_useful?(credential)
194
- credential && !credential.strip.empty?
195
- end
227
+ # Compute a unique identifier for the given string. Currently uses SHA1.
228
+ #
229
+ # === Parameters
230
+ # string(String):: string to compute unique identifier for
231
+ #
232
+ # === Returns
233
+ # String:: unique identifier
234
+ def digest(string)
235
+ Digest::SHA1.hexdigest(string)
236
+ end
196
237
 
197
- # Return the useful portion of this credential. Currently strips
198
- # out any spaces.
199
- def self.useful_part(credential)
200
- credential.strip
201
- end
238
+ # Regexp matching everything not allowed in a URI and also ':',
239
+ # '@' and '/', to be used for encoding usernames and passwords.
240
+ USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
202
241
 
203
- # Compute a unique identifier for the given string. Currently uses SHA1.
204
- #
205
- # === Parameters
206
- # string(String):: string to compute unique identifier for
207
- #
208
- # === Returns
209
- # String:: unique identifier
210
- def digest(string)
211
- Digest::SHA1.hexdigest(string)
242
+ # Return a URI with the given username and password set.
243
+ #
244
+ # === Parameters
245
+ # uri(URI or String):: URI to add user identification to
246
+ #
247
+ # === Returns
248
+ # URI:: URI with username and password identification added
249
+ def add_users_to(uri, username=nil, password=nil)
250
+ begin
251
+ uri = URI.parse(uri) if uri.instance_of?(String)
252
+ if username
253
+ userinfo = URI.escape(username, USERPW)
254
+ userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
255
+ uri.userinfo = userinfo
256
+ end
257
+ uri
258
+ rescue URI::InvalidURIError
259
+ if uri =~ PATTERN::GIT_URI
260
+ user, host, path = $1, $2, $3
261
+ userinfo = URI.escape(user, USERPW)
262
+ userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
263
+ path = "/" + path unless path.start_with?('/')
264
+ URI::Generic::build({:scheme => "ssh",
265
+ :userinfo => userinfo,
266
+ :host => host,
267
+ :path => path
268
+ })
269
+ else
270
+ raise
271
+ end
212
272
  end
273
+ end
213
274
 
214
- # Regexp matching everything not allowed in a URI and also ':',
215
- # '@' and '/', to be used for encoding usernames and passwords.
216
- USERPW = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}]|[:@/]", false, 'N').freeze
217
-
218
- # Return a URI with the given username and password set.
219
- #
220
- # === Parameters
221
- # uri(URI or String):: URI to add user identification to
222
- #
223
- # === Returns
224
- # URI:: URI with username and password identification added
225
- def add_users_to(uri, username=nil, password=nil)
226
- begin
227
- uri = URI.parse(uri) if uri.instance_of?(String)
228
- if username
229
- userinfo = URI.escape(username, USERPW)
230
- userinfo += ":" + URI.escape(password, USERPW) unless password.nil?
231
- uri.userinfo = userinfo
232
- end
233
- uri
234
- rescue URI::InvalidURIError
235
- if uri =~ PATTERN::GIT_URI
236
- user, host, path = $1, $2, $3
237
- userinfo = URI.escape(user, USERPW)
238
- userinfo += ":" + URI.escape(username, USERPW) unless username.nil?
239
- path = "/" + path unless path.start_with?('/')
240
- URI::Generic::build({:scheme => "ssh",
241
- :userinfo => userinfo,
242
- :host => host,
243
- :path => path
244
- })
245
- else
246
- raise
247
- end
275
+ module PATTERN
276
+ include URI::REGEXP::PATTERN
277
+ GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
278
+ end
279
+
280
+ SSH_PORT = 22
281
+
282
+ def self.validate_uri(uri)
283
+ begin
284
+ uri = URI.parse(uri) if uri.instance_of?(String)
285
+ unless registered_url_schemas.include?(uri.scheme)
286
+ raise RepositoryError,
287
+ "Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}"
288
+ end
289
+ check_host(uri, uri.host, uri.port)
290
+ rescue URI::InvalidURIError
291
+ # could be a Git type URI.
292
+ if uri =~ PATTERN::GIT_URI
293
+ check_host(uri, $2, SSH_PORT)
294
+ else
295
+ raise
248
296
  end
249
297
  end
298
+ end
250
299
 
251
- module PATTERN
252
- include URI::REGEXP::PATTERN
253
- GIT_URI = Regexp.new("^((?:[#{UNRESERVED}]|#{ESCAPED})*)@(#{HOST}):(#{ABS_PATH}|#{REL_PATH})$")
254
- end
300
+ def self.check_host(uri, host, port)
301
+ begin
302
+ possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
303
+ if possibles.nil? || possibles.empty?
304
+ raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
305
+ end
306
+ possibles.each do |possible|
307
+ family, port, hostname, address, protocol_family, socket_type, protocol = possible
255
308
 
256
- SSH_PORT = 22
257
-
258
- def self.validate_uri(uri)
259
- begin
260
- uri = URI.parse(uri) if uri.instance_of?(String)
261
- raise "Invalid URI #{uri}: don't know how to interpret scheme #{uri.scheme}" unless @@okay_schemes.include?(uri.scheme)
262
- check_host(uri, uri.host, uri.port)
263
- rescue URI::InvalidURIError
264
- # could be a Git type URI.
265
- if uri =~ PATTERN::GIT_URI
266
- check_host(uri, $2, SSH_PORT)
267
- else
268
- raise
309
+ # Our EC2 gateway is not permitted.
310
+ if address == '169.254.169.254'
311
+ raise RepositoryError, "Invalid URI #{uri}"
312
+ end
313
+
314
+ # Loopbacks are not permitted.
315
+ if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
316
+ raise RepositoryError, "Invalid URI #{uri}"
269
317
  end
270
- end
271
- end
272
318
 
273
- def self.check_host(uri, host, port)
274
- begin
275
- possibles = Socket.getaddrinfo(host, port, Socket::AF_INET, Socket::SOCK_STREAM, Socket::IPPROTO_TCP)
276
- raise "Invalid URI #{uri}: no hosts for #{host}:#{port}" if possibles.nil? || possibles.empty?
277
- possibles.each do |possible|
278
- family, port, hostname, address, protocol_family, socket_type, protocol = possible
279
-
280
- # Our EC2 gateway is not permitted.
281
- raise "Invalid URI #{uri}" if address == "169.254.169.254"
282
- # Loopbacks are not permitted.
283
- raise "Invalid URI #{uri}" if address =~ /^127\.[0-9]+\.[0-9]+\.[0-9]+$/
284
-
285
- # Private networks are not permitted
286
- raise "Invalid URI #{uri}" if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
287
- raise "Invalid URI #{uri}" if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
288
- raise "Invalid URI #{uri}" if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
319
+ # Private networks are not permitted
320
+ if address =~ /^10\.[0-9]+\.[0-9]+\.[0-9]+$/
321
+ raise RepositoryError, "Invalid URI #{uri}"
322
+ end
323
+ if address =~ /^172\.(1[6-9]|[23][0-9])\.[0-9]+\.[0-9]+$/
324
+ raise RepositoryError, "Invalid URI #{uri}"
325
+ end
326
+ if address =~ /^192\.168\.[0-9]+\.[0-9]+$/
327
+ raise RepositoryError, "Invalid URI #{uri}"
289
328
  end
290
- true
291
- rescue SocketError
292
- # means the host doesn't exist
293
- raise "Invalid URI #{uri}: no hosts for #{host}:#{port}"
294
329
  end
330
+ true
331
+ rescue SocketError
332
+ # means the host doesn't exist
333
+ raise RepositoryError, "Invalid URI #{uri}: no hosts for #{host}:#{port}"
295
334
  end
296
-
297
335
  end
336
+
298
337
  end
299
338
  end