right_scraper 5.0.1 → 5.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -35,7 +35,7 @@ module RightScraper
35
35
  # Manage a dedicated SSH agent.
36
36
  class SSHAgent
37
37
 
38
- class SSHAgentError < Exception; end
38
+ class SSHAgentError < ::RightScraper::Error; end
39
39
 
40
40
  def initialize
41
41
  @display = ENV['DISPLAY']
@@ -96,6 +96,7 @@ module RightScraper::Repositories
96
96
  repo = repo_class.new
97
97
  validate_uri(repo_hash[:url]) unless ENV['DEVELOPMENT']
98
98
  repo_hash.each do |k, v|
99
+ k = k.to_sym
99
100
  next if k == :repo_type
100
101
  if [:first_credential, :second_credential].include?(k) && is_useful?(v)
101
102
  v = useful_part(v)
@@ -68,5 +68,23 @@ module RightScraper::Resources
68
68
  @resource_hash
69
69
  end
70
70
 
71
+ # marshal cookbook to hash
72
+ def to_hash
73
+ {
74
+ repository: repository,
75
+ metadata: metadata,
76
+ manifest: manifest,
77
+ pos: pos
78
+ }
79
+ end
80
+
81
+ # unmarshal cookbook from hash
82
+ def self.from_hash(h)
83
+ h = ::RightSupport::Data::Mash.new(h)
84
+ c = self.new(h[:repository], h[:pos], h[:repo_dir])
85
+ c.manifest = h[:manifest]
86
+ c
87
+ end
88
+
71
89
  end
72
90
  end
@@ -36,7 +36,7 @@ module RightScraper::Retrievers
36
36
  # somewhere. Uses command line curl and command line tar.
37
37
  class Download < ::RightScraper::Retrievers::Base
38
38
 
39
- class DownloadError < Exception; end
39
+ class DownloadError < ::RightScraper::Error; end
40
40
 
41
41
  @@available = false
42
42
 
@@ -26,8 +26,10 @@ require 'right_scraper'
26
26
  module RightScraper
27
27
  module Scanners
28
28
  autoload :Base, 'right_scraper/scanners/base'
29
+ autoload :CookbookFilenameScanner, 'right_scraper/scanners/cookbook_filename_scanner'
29
30
  autoload :CookbookManifest, 'right_scraper/scanners/cookbook_manifest'
30
31
  autoload :CookbookMetadata, 'right_scraper/scanners/cookbook_metadata'
32
+ autoload :CookbookMetadataReadOnly, 'right_scraper/scanners/cookbook_metadata_readonly'
31
33
  autoload :CookbookS3Upload, 'right_scraper/scanners/cookbook_s3_upload'
32
34
  autoload :Union, 'right_scraper/scanners/union'
33
35
  autoload :WorkflowManifest, 'right_scraper/scanners/workflow_manifest'
@@ -0,0 +1,83 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2016 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Scanners
28
+ # Build manifests from a filesystem.
29
+ class CookbookFilenameScanner < ::RightScraper::Scanners::Base
30
+
31
+ # Initializer
32
+ #
33
+ # === Parameters
34
+ # @param [Hash] options
35
+ # @option options [Array] :warnings bucket
36
+ def initialize(options)
37
+ super
38
+ raise ArgumentError.new("options[:warnings] is required") unless @warnings = options[:warnings]
39
+ end
40
+
41
+ # Checks file names for any problematic characters.
42
+ #
43
+ # === Block ===
44
+ # @yield [] returns file data, not checked here
45
+ #
46
+ # === Parameters ===
47
+ # @param [String] relative_position for file from root of resource
48
+ def notice(relative_position)
49
+ if detect_non_printing_non_ascii(relative_position)
50
+ @warnings << "A file name contained non-printing or non-ASCII characters: #{relative_position.inspect}"
51
+ end
52
+ end
53
+
54
+ # Checks directory names for any problematic characters.
55
+ #
56
+ # === Parameters ===
57
+ # @param [String] relative_position for directory from root of resource
58
+ #
59
+ # === Returns ===
60
+ # @return [TrueClass|FalseClass] true if scanning should recurse directory
61
+ def notice_dir(relative_position)
62
+ if relative_position && detect_non_printing_non_ascii(relative_position)
63
+ @warnings << "A directory name contained non-printing or non-ASCII characters: #{relative_position.inspect}"
64
+ # ignore directory contents since directory itself is problematic.
65
+ false
66
+ else
67
+ true
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ # Determines if the given string contains non-printing or non-ASCII
74
+ # characters.
75
+ #
76
+ # === Returns ===
77
+ # @return [TrueClass|FalseClass] true if any character is non-printing or non-ASCII
78
+ def detect_non_printing_non_ascii(relative_position)
79
+ !!relative_position.bytes.find { |byte| byte < 0x20 || byte > 0x7E }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2016 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -24,6 +24,7 @@
24
24
  # ancestor
25
25
  require 'right_scraper/scanners'
26
26
 
27
+ require 'fileutils'
27
28
  require 'json'
28
29
  require 'right_popen'
29
30
  require 'right_popen/safe_output_buffer'
@@ -42,11 +43,25 @@ module RightScraper::Scanners
42
43
  JAILED_FILE_SIZE_CONSTRAINT = 128 * 1024 # 128 KB
43
44
  FREED_FILE_SIZE_CONSTRAINT = 64 * 1024 # 64 KB
44
45
 
45
- TARBALL_CREATE_TIMEOUT = 30 # ..to create the tarball
46
- TARBALL_ARCHIVE_NAME = 'cookbook.tar'
46
+ attr_reader :freed_dir
47
47
 
48
48
  # exceptions
49
- class MetadataError < Exception; end
49
+ class MetadataError < ::RightScraper::Error; end
50
+
51
+ def initialize(options)
52
+ super
53
+
54
+ # we will free the generated 'metadata.json' to a path relative to the
55
+ # repository directory. this allows for multiple passes over the
56
+ # generated file(s) using different child processes, some or all of
57
+ # which may execute in containers. the exact location of the freed file
58
+ # depends on the cookbook position; recall that multiple cookbooks can
59
+ # appear within a given repository.
60
+ @freed_dir = options[:freed_dir].to_s
61
+ if @freed_dir.empty? || !::File.directory?(@freed_dir)
62
+ raise ::ArgumentError, "Missing or invalid freed_dir: #{@freed_dir.inspect}"
63
+ end
64
+ end
50
65
 
51
66
  def tls
52
67
  Thread.current[self.class.to_s.to_sym] ||= {}
@@ -97,24 +112,14 @@ module RightScraper::Scanners
97
112
  @cookbook = nil
98
113
  end
99
114
 
100
- # All done scanning this repository, we can tear down the warden container we may or
101
- # may not have created while parsing the cookbooks for this repository.
115
+ # All done scanning this repository.
102
116
  #
103
117
  def finish
104
118
  begin
105
- FileUtils.remove_entry_secure(tls[:tmpdir]) if tls[:tmpdir]
119
+ ::FileUtils.remove_entry_secure(tls[:tmpdir]) if tls[:tmpdir]
106
120
  rescue ::Exception => e
107
121
  @logger.note_warning(e.message)
108
122
  end
109
-
110
- if warden = tls[:warden]
111
- begin
112
- warden.cleanup
113
- rescue ::Exception => e
114
- @logger.note_warning(e.message)
115
- end
116
- end
117
-
118
123
  ensure
119
124
  # Cleanup thread-local storage
120
125
  tls.clear
@@ -157,8 +162,7 @@ module RightScraper::Scanners
157
162
  private
158
163
 
159
164
  # Executes the 'metadata.rb' file from a cookbook. Because we don't want
160
- # to evaluate arbitrary Ruby code, we need to sandbox it first using
161
- # Warden.
165
+ # to evaluate arbitrary Ruby code, we need to sandbox it first.
162
166
  #
163
167
  # in order for knife metadata to succeed in the general case we need to
164
168
  # copy some (but not all) of the cookbook directory AND its ancestors (if
@@ -187,141 +191,113 @@ module RightScraper::Scanners
187
191
  # note we will use the same tmpdir path inside and outside the
188
192
  # container only because it is non-trivial to invoke mktmpdir inside
189
193
  # the container.
190
- tmpdir = create_tmpdir
194
+ tmpdir, created = create_tmpdir
191
195
 
192
- # arrest
193
- knife_metadata_script_path = ::File.join(tmpdir, KNIFE_METADATA_SCRIPT_NAME)
196
+ # path constants
197
+ src_knife_script_path = ::File.expand_path(
198
+ ::File.join(__FILE__, '../../../../scripts', KNIFE_METADATA_SCRIPT_NAME))
199
+ dst_knife_script_dir = tmpdir
200
+ dst_knife_script_path = ::File.join(dst_knife_script_dir, KNIFE_METADATA_SCRIPT_NAME)
194
201
  jailed_repo_dir = ::File.join(tmpdir, UNDEFINED_COOKBOOK_NAME)
195
202
  jailed_cookbook_dir = (@cookbook.pos == '.' && jailed_repo_dir) || ::File.join(jailed_repo_dir, @cookbook.pos)
196
203
  jailed_metadata_json_path = ::File.join(jailed_cookbook_dir, JSON_METADATA)
197
- freed_metadata_json_path = ::File.join(tmpdir, JSON_METADATA)
204
+ freed_metadata_dir = (@cookbook.pos == '.' && freed_dir) || ::File.join(freed_dir, @cookbook.pos)
205
+ freed_metadata_json_path = ::File.join(freed_metadata_dir, JSON_METADATA)
206
+
207
+ # in the multi-pass case we will run this scanner only on the first pass
208
+ # so the 'metadata.json' file should not exist. the read-only scanner,
209
+ # which is safe outside of containment, should be used subsequently.
210
+ # the entire 'freed' directory should have been removed upon the next
211
+ # successful retrieval so that this scanner will succeed.
212
+ if ::File.file?(freed_metadata_json_path)
213
+ raise MetadataError, "Refused to overwrite already-generated metadata file: #{freed_metadata_json_path}"
214
+ end
198
215
 
199
- # police brutality
216
+ # jail the repo using the legacy semantics for copying files in and out
217
+ # of jail.
200
218
  copy_out = { jailed_metadata_json_path => freed_metadata_json_path }
201
219
 
202
- begin
203
- # jail the repo
204
- unless warden = tls[:warden]
205
- # Get a list of the files in the repo we need
206
- create_knife_metadata_script(knife_metadata_script_path)
207
- copy_in = generate_copy_in
208
- copy_in << knife_metadata_script_path
209
-
210
- # Create the container, one for all in this repo
211
- warden = tls[:warden] = create_warden
212
-
213
- # tar up the required pieces of the repo and copy them into the container
214
- cookbook_tarball_path = ::File.join(tmpdir, TARBALL_ARCHIVE_NAME)
215
- # prosecute
216
- create_cookbook_tarball(cookbook_tarball_path, copy_in, jailed_repo_dir)
217
-
218
- # unarchive the tarball on the otherside (this is faster than single file copies)
219
- cmd = "tar -Pxf #{cookbook_tarball_path.inspect}"
220
- warden.run_command_in_jail(cmd, cookbook_tarball_path, nil)
221
- end
220
+ # copy files into the jail once per repository (i.e. not once per
221
+ # cookbook within the repository).
222
+ if created
223
+ copy_in = generate_copy_in(@cookbook.repo_dir, jailed_repo_dir)
224
+ copy_in[src_knife_script_path] = dst_knife_script_path
225
+
226
+ # note that at this point we previously used Warden as a container
227
+ # for the copied-in files but now we assume that the current process
228
+ # is already in a container (i.e. Docker) and so this copying is
229
+ # more about creating a writable directory for knife than about
230
+ # containment. the checked-out repo should be read-only to this
231
+ # contained process due to running with limited privileges.
232
+ do_copy_in(copy_in)
233
+ end
222
234
 
223
- # Generate the metadata
224
- cmd = "export LC_ALL='en_US.UTF-8'; ruby #{knife_metadata_script_path.inspect} #{jailed_cookbook_dir.inspect}"
225
- warden.run_command_in_jail(cmd, nil, copy_out)
235
+ # HACK: support ad-hoc testing in dev-mode by using the current version
236
+ # for rbenv shell.
237
+ if ::ENV['RBENV_VERSION'].to_s.empty?
238
+ ruby = 'ruby'
239
+ else
240
+ ruby = `which ruby`.chomp
241
+ end
226
242
 
227
- # constraining the generate file size is debatable, but our UI
228
- # attempts to load metadata JSON into memory far too often to be
229
- # blasé about generating multi-megabyte JSON files.
230
- unless ::File.file?(freed_metadata_json_path)
231
- raise MetadataError, 'Generated JSON file not found.'
232
- end
233
- freed_metadata_json_size = ::File.stat(freed_metadata_json_path).size
234
- if freed_metadata_json_size <= FREED_FILE_SIZE_CONSTRAINT
235
- # parole for good behavior
236
- return ::File.read(freed_metadata_json_path)
237
- else
238
- # life imprisonment.
239
- raise MetadataError,
240
- "Generated metadata size of" +
241
- " #{freed_metadata_json_size / 1024} KB" +
242
- " exceeded the allowed limit of" +
243
- " #{FREED_FILE_SIZE_CONSTRAINT / 1024} KB"
244
- end
245
- rescue ::RightScraper::Processes::Warden::LinkError => e
246
- raise MetadataError,
247
- "Failed to generate metadata from source: #{e.message}" +
248
- "\n#{e.link_result.stdout}" +
249
- "\n#{e.link_result.stderr}"
243
+ # execute knife as a child process. any constraints are assumed to be
244
+ # imposed on the current process by a container (timeout, memory, etc.)
245
+ shell = ::RightGit::Shell::Default
246
+ output = StringIO.new
247
+ exitstatus = shell.execute(
248
+ "#{ruby} #{dst_knife_script_path.inspect} #{jailed_cookbook_dir.inspect} 2>&1",
249
+ directory: dst_knife_script_dir,
250
+ outstream: output,
251
+ raise_on_failure: false,
252
+ set_env_vars: { LC_ALL: 'en_US.UTF-8' }, # character encoding for emitted JSON
253
+ clear_env_vars: %w{BUNDLE_BIN_PATH BUNDLE_GEMFILE})
254
+ if exitstatus != 0
255
+ output = output.string
256
+ raise MetadataError, "Failed to run chef knife: #{output[0, 1024]}"
250
257
  end
251
- end
252
- end
253
258
 
254
- def stdout_tarball(data)
255
- @stdout_buffer << data
256
- end
259
+ # free files from jail.
260
+ do_copy_out(copy_out)
257
261
 
258
- def stderr_tarball(data)
259
- @stderr_buffer.safe_buffer_data(data)
262
+ # load and return freed metadata.
263
+ return ::File.read(freed_metadata_json_path)
264
+ end
260
265
  end
261
266
 
262
- def timeout_tarball
263
- raise MetadataError,
264
- "Timed out waiting for tarball to build.\n" +
265
- "stdout: #{@stdout_buffer.join}\n" +
266
- "stderr: #{@stderr_buffer.display_text}"
267
+ # copies files into jail. we no longer start a new container so this is only
268
+ # a local file copying operation. we still need files to appear in a
269
+ # writable directory location because knife will write to the directory.
270
+ def do_copy_in(path_map)
271
+ path_map.each do |src_path, dst_path|
272
+ if src_path != dst_path
273
+ ::FileUtils.mkdir_p(::File.dirname(dst_path))
274
+ ::FileUtils.cp(src_path, dst_path)
275
+ end
276
+ end
277
+ true
267
278
  end
268
279
 
269
- # @param [String] dest_file
270
- # @param [Array] contents
271
- # @param [String] dest_path
272
- def create_cookbook_tarball(dest_file, contents, dest_path)
273
- @logger.operation(:tarball_generation) do
274
- tarball_cmd = [
275
- 'tar',
276
- "-Pcf #{dest_file}",
277
- "--transform='s,#{@cookbook.repo_dir},#{dest_path},'",
278
- '-T', '-'
279
- ]
280
-
281
- @stdout_buffer = []
282
- @stderr_buffer = ::RightScale::RightPopen::SafeOutputBuffer.new
283
- begin
284
- ::RightScale::RightPopen.popen3_sync(
285
- tarball_cmd.join(' '),
286
- :target => self,
287
- :timeout_handler => :timeout_tarball,
288
- :input => contents.join("\n"),
289
- :stderr_handler => :stderr_tarball,
290
- :stdout_handler => :stdout_tarball,
291
- :inherit_io => true, # avoid killing any rails connection
292
- :timeout_seconds => TARBALL_CREATE_TIMEOUT)
293
- rescue Exception => e
280
+ # copies files out of jail by mapping of jail to free path.
281
+ def do_copy_out(path_map)
282
+ path_map.each do |src_path, dst_path|
283
+ # constraining the generated 'metadata.json' size is debatable, but
284
+ # our UI attempts to load metadata JSON into memory far too often to
285
+ # be blasé about generating multi-megabyte JSON files.
286
+ unless ::File.file?(src_path)
287
+ raise MetadataError, "Expected generated file was not found: #{src_path}"
288
+ end
289
+ src_size = ::File.stat(src_path).size
290
+ if src_size <= FREED_FILE_SIZE_CONSTRAINT
291
+ ::FileUtils.mkdir_p(::File.dirname(dst_path))
292
+ ::FileUtils.cp(src_path, dst_path)
293
+ else
294
294
  raise MetadataError,
295
- "Failed to generate cookbook tarball from source files: #{e.message}\n" +
296
- "stdout: #{@stdout_buffer.join}\n" +
297
- "stderr: #{@stderr_buffer.display_text}"
295
+ "Generated file size of" +
296
+ " #{src_size / 1024} KB" +
297
+ " exceeded the allowed limit of" +
298
+ " #{FREED_FILE_SIZE_CONSTRAINT / 1024} KB"
298
299
  end
299
300
  end
300
- end
301
-
302
- # generates a script that runs Chef's knife tool. it presumes the jail
303
- # contains a ruby interpreter that has chef installed as a gem.
304
- #
305
- # we want to avoid using the knife command line only because it requires a
306
- # '$HOME/.chef/knife.rb' configuration file even though we won't use that
307
- # configuration file in any way. :@
308
- #
309
- # the simplest solution is to execute the knife tool within a ruby script
310
- # because it has no pre-configuration requirement and it does not require
311
- # the knife binstub to be on the PATH.
312
- def create_knife_metadata_script(script_path)
313
- script = <<EOS
314
- require 'rubygems'
315
- require 'chef'
316
- require 'chef/knife/cookbook_metadata'
317
-
318
- jailed_cookbook_dir = ARGV.pop
319
- knife_metadata = ::Chef::Knife::CookbookMetadata.new
320
- knife_metadata.name_args = [::File.basename(jailed_cookbook_dir)]
321
- knife_metadata.config[:cookbook_path] = ::File.dirname(jailed_cookbook_dir)
322
- knife_metadata.run
323
- EOS
324
- ::File.open(script_path, 'w') { |f| f.puts script }
325
301
  true
326
302
  end
327
303
 
@@ -340,12 +316,20 @@ EOS
340
316
  # again, the user can work around these contraints by generating his own
341
317
  # metadata and checking it into the repository.
342
318
  #
343
- # @return [Array] list of files to copy into jail
344
- def generate_copy_in()
319
+ # @return [Hash] path_map as map of source to destination file paths
320
+ def generate_copy_in(src_base_path, dst_base_path)
321
+ src_base_path = ::File.expand_path(src_base_path)
322
+ dst_base_path = ::File.expand_path(dst_base_path)
345
323
  copy_in = []
346
- start_path = @cookbook.repo_dir
347
- recursive_generate_copy_in(copy_in, start_path)
348
- copy_in
324
+ recursive_generate_copy_in(copy_in, src_base_path)
325
+
326
+ src_base_path += '/'
327
+ src_base_path_len = src_base_path.length
328
+ dst_base_path += '/'
329
+ copy_in.inject({}) do |h, src_path|
330
+ h[src_path] = ::File.join(dst_base_path, src_path[src_base_path_len..-1])
331
+ h
332
+ end
349
333
  end
350
334
 
351
335
  # recursive part of generate_copy_in
@@ -399,16 +383,18 @@ EOS
399
383
  path[(@cookbook.repo_dir.length + 1)..-1]
400
384
  end
401
385
 
402
- # factory method for an object capable of running command in jail
403
- # (convenient for testing).
404
- def create_warden
405
- ::RightScraper::Processes::Warden.new
406
- end
407
-
408
386
  # factory method for tmpdir (convenient for testing).
409
387
  def create_tmpdir
410
- tls[:tmpdir] ||= ::Dir.mktmpdir
388
+ td = tls[:tmpdir]
389
+ if td.nil?
390
+ td = ::Dir.mktmpdir
391
+ tls[:tmpdir] = td
392
+ created = true
393
+ else
394
+ created = false
395
+ end
396
+ return [td, created]
411
397
  end
412
398
 
413
- end
414
- end
399
+ end # CookbookMetadata
400
+ end # RightScraper::Scanners