right_scraper 5.0.1 → 5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,7 +35,7 @@ module RightScraper
35
35
  # Manage a dedicated SSH agent.
36
36
  class SSHAgent
37
37
 
38
- class SSHAgentError < Exception; end
38
+ class SSHAgentError < ::RightScraper::Error; end
39
39
 
40
40
  def initialize
41
41
  @display = ENV['DISPLAY']
@@ -96,6 +96,7 @@ module RightScraper::Repositories
96
96
  repo = repo_class.new
97
97
  validate_uri(repo_hash[:url]) unless ENV['DEVELOPMENT']
98
98
  repo_hash.each do |k, v|
99
+ k = k.to_sym
99
100
  next if k == :repo_type
100
101
  if [:first_credential, :second_credential].include?(k) && is_useful?(v)
101
102
  v = useful_part(v)
@@ -68,5 +68,23 @@ module RightScraper::Resources
68
68
  @resource_hash
69
69
  end
70
70
 
71
+ # marshal cookbook to hash
72
+ def to_hash
73
+ {
74
+ repository: repository,
75
+ metadata: metadata,
76
+ manifest: manifest,
77
+ pos: pos
78
+ }
79
+ end
80
+
81
+ # unmarshal cookbook from hash
82
+ def self.from_hash(h)
83
+ h = ::RightSupport::Data::Mash.new(h)
84
+ c = self.new(h[:repository], h[:pos], h[:repo_dir])
85
+ c.manifest = h[:manifest]
86
+ c
87
+ end
88
+
71
89
  end
72
90
  end
@@ -36,7 +36,7 @@ module RightScraper::Retrievers
36
36
  # somewhere. Uses command line curl and command line tar.
37
37
  class Download < ::RightScraper::Retrievers::Base
38
38
 
39
- class DownloadError < Exception; end
39
+ class DownloadError < ::RightScraper::Error; end
40
40
 
41
41
  @@available = false
42
42
 
@@ -26,8 +26,10 @@ require 'right_scraper'
26
26
  module RightScraper
27
27
  module Scanners
28
28
  autoload :Base, 'right_scraper/scanners/base'
29
+ autoload :CookbookFilenameScanner, 'right_scraper/scanners/cookbook_filename_scanner'
29
30
  autoload :CookbookManifest, 'right_scraper/scanners/cookbook_manifest'
30
31
  autoload :CookbookMetadata, 'right_scraper/scanners/cookbook_metadata'
32
+ autoload :CookbookMetadataReadOnly, 'right_scraper/scanners/cookbook_metadata_readonly'
31
33
  autoload :CookbookS3Upload, 'right_scraper/scanners/cookbook_s3_upload'
32
34
  autoload :Union, 'right_scraper/scanners/union'
33
35
  autoload :WorkflowManifest, 'right_scraper/scanners/workflow_manifest'
@@ -0,0 +1,83 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2016 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Scanners
28
+ # Build manifests from a filesystem.
29
+ class CookbookFilenameScanner < ::RightScraper::Scanners::Base
30
+
31
+ # Initializer
32
+ #
33
+ # === Parameters
34
+ # @param [Hash] options
35
+ # @option options [Array] :warnings bucket
36
+ def initialize(options)
37
+ super
38
+ raise ArgumentError.new("options[:warnings] is required") unless @warnings = options[:warnings]
39
+ end
40
+
41
+ # Checks file names for any problematic characters.
42
+ #
43
+ # === Block ===
44
+ # @yield [] returns file data, not checked here
45
+ #
46
+ # === Parameters ===
47
+ # @param [String] relative_position for file from root of resource
48
+ def notice(relative_position)
49
+ if detect_non_printing_non_ascii(relative_position)
50
+ @warnings << "A file name contained non-printing or non-ASCII characters: #{relative_position.inspect}"
51
+ end
52
+ end
53
+
54
+ # Checks directory names for any problematic characters.
55
+ #
56
+ # === Parameters ===
57
+ # @param [String] relative_position for directory from root of resource
58
+ #
59
+ # === Returns ===
60
+ # @return [TrueClass|FalseClass] true if scanning should recurse directory
61
+ def notice_dir(relative_position)
62
+ if relative_position && detect_non_printing_non_ascii(relative_position)
63
+ @warnings << "A directory name contained non-printing or non-ASCII characters: #{relative_position.inspect}"
64
+ # ignore directory contents since directory itself is problematic.
65
+ false
66
+ else
67
+ true
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ # Determines if the given string contains non-printing or non-ASCII
74
+ # characters.
75
+ #
76
+ # === Returns ===
77
+ # @return [TrueClass|FalseClass] true if any character is non-printing or non-ASCII
78
+ def detect_non_printing_non_ascii(relative_position)
79
+ !!relative_position.bytes.find { |byte| byte < 0x20 || byte > 0x7E }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2016 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -24,6 +24,7 @@
24
24
  # ancestor
25
25
  require 'right_scraper/scanners'
26
26
 
27
+ require 'fileutils'
27
28
  require 'json'
28
29
  require 'right_popen'
29
30
  require 'right_popen/safe_output_buffer'
@@ -42,11 +43,25 @@ module RightScraper::Scanners
42
43
  JAILED_FILE_SIZE_CONSTRAINT = 128 * 1024 # 128 KB
43
44
  FREED_FILE_SIZE_CONSTRAINT = 64 * 1024 # 64 KB
44
45
 
45
- TARBALL_CREATE_TIMEOUT = 30 # ..to create the tarball
46
- TARBALL_ARCHIVE_NAME = 'cookbook.tar'
46
+ attr_reader :freed_dir
47
47
 
48
48
  # exceptions
49
- class MetadataError < Exception; end
49
+ class MetadataError < ::RightScraper::Error; end
50
+
51
+ def initialize(options)
52
+ super
53
+
54
+ # we will free the generated 'metadata.json' to a path relative to the
55
+ # repository directory. this allows for multiple passes over the
56
+ # generated file(s) using different child processes, some or all of
57
+ # which may execute in containers. the exact location of the freed file
58
+ # depends on the cookbook position; recall that multiple cookbooks can
59
+ # appear within a given repository.
60
+ @freed_dir = options[:freed_dir].to_s
61
+ if @freed_dir.empty? || !::File.directory?(@freed_dir)
62
+ raise ::ArgumentError, "Missing or invalid freed_dir: #{@freed_dir.inspect}"
63
+ end
64
+ end
50
65
 
51
66
  def tls
52
67
  Thread.current[self.class.to_s.to_sym] ||= {}
@@ -97,24 +112,14 @@ module RightScraper::Scanners
97
112
  @cookbook = nil
98
113
  end
99
114
 
100
- # All done scanning this repository, we can tear down the warden container we may or
101
- # may not have created while parsing the cookbooks for this repository.
115
+ # All done scanning this repository.
102
116
  #
103
117
  def finish
104
118
  begin
105
- FileUtils.remove_entry_secure(tls[:tmpdir]) if tls[:tmpdir]
119
+ ::FileUtils.remove_entry_secure(tls[:tmpdir]) if tls[:tmpdir]
106
120
  rescue ::Exception => e
107
121
  @logger.note_warning(e.message)
108
122
  end
109
-
110
- if warden = tls[:warden]
111
- begin
112
- warden.cleanup
113
- rescue ::Exception => e
114
- @logger.note_warning(e.message)
115
- end
116
- end
117
-
118
123
  ensure
119
124
  # Cleanup thread-local storage
120
125
  tls.clear
@@ -157,8 +162,7 @@ module RightScraper::Scanners
157
162
  private
158
163
 
159
164
  # Executes the 'metadata.rb' file from a cookbook. Because we don't want
160
- # to evaluate arbitrary Ruby code, we need to sandbox it first using
161
- # Warden.
165
+ # to evaluate arbitrary Ruby code, we need to sandbox it first.
162
166
  #
163
167
  # in order for knife metadata to succeed in the general case we need to
164
168
  # copy some (but not all) of the cookbook directory AND its ancestors (if
@@ -187,141 +191,113 @@ module RightScraper::Scanners
187
191
  # note we will use the same tmpdir path inside and outside the
188
192
  # container only because it is non-trivial to invoke mktmpdir inside
189
193
  # the container.
190
- tmpdir = create_tmpdir
194
+ tmpdir, created = create_tmpdir
191
195
 
192
- # arrest
193
- knife_metadata_script_path = ::File.join(tmpdir, KNIFE_METADATA_SCRIPT_NAME)
196
+ # path constants
197
+ src_knife_script_path = ::File.expand_path(
198
+ ::File.join(__FILE__, '../../../../scripts', KNIFE_METADATA_SCRIPT_NAME))
199
+ dst_knife_script_dir = tmpdir
200
+ dst_knife_script_path = ::File.join(dst_knife_script_dir, KNIFE_METADATA_SCRIPT_NAME)
194
201
  jailed_repo_dir = ::File.join(tmpdir, UNDEFINED_COOKBOOK_NAME)
195
202
  jailed_cookbook_dir = (@cookbook.pos == '.' && jailed_repo_dir) || ::File.join(jailed_repo_dir, @cookbook.pos)
196
203
  jailed_metadata_json_path = ::File.join(jailed_cookbook_dir, JSON_METADATA)
197
- freed_metadata_json_path = ::File.join(tmpdir, JSON_METADATA)
204
+ freed_metadata_dir = (@cookbook.pos == '.' && freed_dir) || ::File.join(freed_dir, @cookbook.pos)
205
+ freed_metadata_json_path = ::File.join(freed_metadata_dir, JSON_METADATA)
206
+
207
+ # in the multi-pass case we will run this scanner only on the first pass
208
+ # so the 'metadata.json' file should not exist. the read-only scanner,
209
+ # which is safe outside of containment, should be used subsequently.
210
+ # the entire 'freed' directory should have been removed upon the next
211
+ # successful retrieval so that this scanner will succeed.
212
+ if ::File.file?(freed_metadata_json_path)
213
+ raise MetadataError, "Refused to overwrite already-generated metadata file: #{freed_metadata_json_path}"
214
+ end
198
215
 
199
- # police brutality
216
+ # jail the repo using the legacy semantics for copying files in and out
217
+ # of jail.
200
218
  copy_out = { jailed_metadata_json_path => freed_metadata_json_path }
201
219
 
202
- begin
203
- # jail the repo
204
- unless warden = tls[:warden]
205
- # Get a list of the files in the repo we need
206
- create_knife_metadata_script(knife_metadata_script_path)
207
- copy_in = generate_copy_in
208
- copy_in << knife_metadata_script_path
209
-
210
- # Create the container, one for all in this repo
211
- warden = tls[:warden] = create_warden
212
-
213
- # tar up the required pieces of the repo and copy them into the container
214
- cookbook_tarball_path = ::File.join(tmpdir, TARBALL_ARCHIVE_NAME)
215
- # prosecute
216
- create_cookbook_tarball(cookbook_tarball_path, copy_in, jailed_repo_dir)
217
-
218
- # unarchive the tarball on the otherside (this is faster than single file copies)
219
- cmd = "tar -Pxf #{cookbook_tarball_path.inspect}"
220
- warden.run_command_in_jail(cmd, cookbook_tarball_path, nil)
221
- end
220
+ # copy files into the jail once per repository (i.e. not once per
221
+ # cookbook within the repository).
222
+ if created
223
+ copy_in = generate_copy_in(@cookbook.repo_dir, jailed_repo_dir)
224
+ copy_in[src_knife_script_path] = dst_knife_script_path
225
+
226
+ # note that at this point we previously used Warden as a container
227
+ # for the copied-in files but now we assume that the current process
228
+ # is already in a container (i.e. Docker) and so this copying is
229
+ # more about creating a writable directory for knife than about
230
+ # containment. the checked-out repo should be read-only to this
231
+ # contained process due to running with limited privileges.
232
+ do_copy_in(copy_in)
233
+ end
222
234
 
223
- # Generate the metadata
224
- cmd = "export LC_ALL='en_US.UTF-8'; ruby #{knife_metadata_script_path.inspect} #{jailed_cookbook_dir.inspect}"
225
- warden.run_command_in_jail(cmd, nil, copy_out)
235
+ # HACK: support ad-hoc testing in dev-mode by using the current version
236
+ # for rbenv shell.
237
+ if ::ENV['RBENV_VERSION'].to_s.empty?
238
+ ruby = 'ruby'
239
+ else
240
+ ruby = `which ruby`.chomp
241
+ end
226
242
 
227
- # constraining the generate file size is debatable, but our UI
228
- # attempts to load metadata JSON into memory far too often to be
229
- # blasé about generating multi-megabyte JSON files.
230
- unless ::File.file?(freed_metadata_json_path)
231
- raise MetadataError, 'Generated JSON file not found.'
232
- end
233
- freed_metadata_json_size = ::File.stat(freed_metadata_json_path).size
234
- if freed_metadata_json_size <= FREED_FILE_SIZE_CONSTRAINT
235
- # parole for good behavior
236
- return ::File.read(freed_metadata_json_path)
237
- else
238
- # life imprisonment.
239
- raise MetadataError,
240
- "Generated metadata size of" +
241
- " #{freed_metadata_json_size / 1024} KB" +
242
- " exceeded the allowed limit of" +
243
- " #{FREED_FILE_SIZE_CONSTRAINT / 1024} KB"
244
- end
245
- rescue ::RightScraper::Processes::Warden::LinkError => e
246
- raise MetadataError,
247
- "Failed to generate metadata from source: #{e.message}" +
248
- "\n#{e.link_result.stdout}" +
249
- "\n#{e.link_result.stderr}"
243
+ # execute knife as a child process. any constraints are assumed to be
244
+ # imposed on the current process by a container (timeout, memory, etc.)
245
+ shell = ::RightGit::Shell::Default
246
+ output = StringIO.new
247
+ exitstatus = shell.execute(
248
+ "#{ruby} #{dst_knife_script_path.inspect} #{jailed_cookbook_dir.inspect} 2>&1",
249
+ directory: dst_knife_script_dir,
250
+ outstream: output,
251
+ raise_on_failure: false,
252
+ set_env_vars: { LC_ALL: 'en_US.UTF-8' }, # character encoding for emitted JSON
253
+ clear_env_vars: %w{BUNDLE_BIN_PATH BUNDLE_GEMFILE})
254
+ if exitstatus != 0
255
+ output = output.string
256
+ raise MetadataError, "Failed to run chef knife: #{output[0, 1024]}"
250
257
  end
251
- end
252
- end
253
258
 
254
- def stdout_tarball(data)
255
- @stdout_buffer << data
256
- end
259
+ # free files from jail.
260
+ do_copy_out(copy_out)
257
261
 
258
- def stderr_tarball(data)
259
- @stderr_buffer.safe_buffer_data(data)
262
+ # load and return freed metadata.
263
+ return ::File.read(freed_metadata_json_path)
264
+ end
260
265
  end
261
266
 
262
- def timeout_tarball
263
- raise MetadataError,
264
- "Timed out waiting for tarball to build.\n" +
265
- "stdout: #{@stdout_buffer.join}\n" +
266
- "stderr: #{@stderr_buffer.display_text}"
267
+ # copies files into jail. we no longer start a new container so this is only
268
+ # a local file copying operation. we still need files to appear in a
269
+ # writable directory location because knife will write to the directory.
270
+ def do_copy_in(path_map)
271
+ path_map.each do |src_path, dst_path|
272
+ if src_path != dst_path
273
+ ::FileUtils.mkdir_p(::File.dirname(dst_path))
274
+ ::FileUtils.cp(src_path, dst_path)
275
+ end
276
+ end
277
+ true
267
278
  end
268
279
 
269
- # @param [String] dest_file
270
- # @param [Array] contents
271
- # @param [String] dest_path
272
- def create_cookbook_tarball(dest_file, contents, dest_path)
273
- @logger.operation(:tarball_generation) do
274
- tarball_cmd = [
275
- 'tar',
276
- "-Pcf #{dest_file}",
277
- "--transform='s,#{@cookbook.repo_dir},#{dest_path},'",
278
- '-T', '-'
279
- ]
280
-
281
- @stdout_buffer = []
282
- @stderr_buffer = ::RightScale::RightPopen::SafeOutputBuffer.new
283
- begin
284
- ::RightScale::RightPopen.popen3_sync(
285
- tarball_cmd.join(' '),
286
- :target => self,
287
- :timeout_handler => :timeout_tarball,
288
- :input => contents.join("\n"),
289
- :stderr_handler => :stderr_tarball,
290
- :stdout_handler => :stdout_tarball,
291
- :inherit_io => true, # avoid killing any rails connection
292
- :timeout_seconds => TARBALL_CREATE_TIMEOUT)
293
- rescue Exception => e
280
+ # copies files out of jail by mapping of jail to free path.
281
+ def do_copy_out(path_map)
282
+ path_map.each do |src_path, dst_path|
283
+ # constraining the generated 'metadata.json' size is debatable, but
284
+ # our UI attempts to load metadata JSON into memory far too often to
285
+ # be blasé about generating multi-megabyte JSON files.
286
+ unless ::File.file?(src_path)
287
+ raise MetadataError, "Expected generated file was not found: #{src_path}"
288
+ end
289
+ src_size = ::File.stat(src_path).size
290
+ if src_size <= FREED_FILE_SIZE_CONSTRAINT
291
+ ::FileUtils.mkdir_p(::File.dirname(dst_path))
292
+ ::FileUtils.cp(src_path, dst_path)
293
+ else
294
294
  raise MetadataError,
295
- "Failed to generate cookbook tarball from source files: #{e.message}\n" +
296
- "stdout: #{@stdout_buffer.join}\n" +
297
- "stderr: #{@stderr_buffer.display_text}"
295
+ "Generated file size of" +
296
+ " #{src_size / 1024} KB" +
297
+ " exceeded the allowed limit of" +
298
+ " #{FREED_FILE_SIZE_CONSTRAINT / 1024} KB"
298
299
  end
299
300
  end
300
- end
301
-
302
- # generates a script that runs Chef's knife tool. it presumes the jail
303
- # contains a ruby interpreter that has chef installed as a gem.
304
- #
305
- # we want to avoid using the knife command line only because it requires a
306
- # '$HOME/.chef/knife.rb' configuration file even though we won't use that
307
- # configuration file in any way. :@
308
- #
309
- # the simplest solution is to execute the knife tool within a ruby script
310
- # because it has no pre-configuration requirement and it does not require
311
- # the knife binstub to be on the PATH.
312
- def create_knife_metadata_script(script_path)
313
- script = <<EOS
314
- require 'rubygems'
315
- require 'chef'
316
- require 'chef/knife/cookbook_metadata'
317
-
318
- jailed_cookbook_dir = ARGV.pop
319
- knife_metadata = ::Chef::Knife::CookbookMetadata.new
320
- knife_metadata.name_args = [::File.basename(jailed_cookbook_dir)]
321
- knife_metadata.config[:cookbook_path] = ::File.dirname(jailed_cookbook_dir)
322
- knife_metadata.run
323
- EOS
324
- ::File.open(script_path, 'w') { |f| f.puts script }
325
301
  true
326
302
  end
327
303
 
@@ -340,12 +316,20 @@ EOS
340
316
  # again, the user can work around these contraints by generating his own
341
317
  # metadata and checking it into the repository.
342
318
  #
343
- # @return [Array] list of files to copy into jail
344
- def generate_copy_in()
319
+ # @return [Hash] path_map as map of source to destination file paths
320
+ def generate_copy_in(src_base_path, dst_base_path)
321
+ src_base_path = ::File.expand_path(src_base_path)
322
+ dst_base_path = ::File.expand_path(dst_base_path)
345
323
  copy_in = []
346
- start_path = @cookbook.repo_dir
347
- recursive_generate_copy_in(copy_in, start_path)
348
- copy_in
324
+ recursive_generate_copy_in(copy_in, src_base_path)
325
+
326
+ src_base_path += '/'
327
+ src_base_path_len = src_base_path.length
328
+ dst_base_path += '/'
329
+ copy_in.inject({}) do |h, src_path|
330
+ h[src_path] = ::File.join(dst_base_path, src_path[src_base_path_len..-1])
331
+ h
332
+ end
349
333
  end
350
334
 
351
335
  # recursive part of generate_copy_in
@@ -399,16 +383,18 @@ EOS
399
383
  path[(@cookbook.repo_dir.length + 1)..-1]
400
384
  end
401
385
 
402
- # factory method for an object capable of running command in jail
403
- # (convenient for testing).
404
- def create_warden
405
- ::RightScraper::Processes::Warden.new
406
- end
407
-
408
386
  # factory method for tmpdir (convenient for testing).
409
387
  def create_tmpdir
410
- tls[:tmpdir] ||= ::Dir.mktmpdir
388
+ td = tls[:tmpdir]
389
+ if td.nil?
390
+ td = ::Dir.mktmpdir
391
+ tls[:tmpdir] = td
392
+ created = true
393
+ else
394
+ created = false
395
+ end
396
+ return [td, created]
411
397
  end
412
398
 
413
- end
414
- end
399
+ end # CookbookMetadata
400
+ end # RightScraper::Scanners