right_scraper 3.2.6 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,49 +21,394 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/scanners'
|
26
|
+
|
24
27
|
require 'json'
|
28
|
+
require 'right_popen'
|
29
|
+
require 'right_popen/safe_output_buffer'
|
30
|
+
require 'tmpdir'
|
31
|
+
|
32
|
+
module RightScraper::Scanners
|
33
|
+
|
34
|
+
# Load cookbook metadata from a filesystem.
|
35
|
+
class CookbookMetadata < ::RightScraper::Scanners::Base
|
36
|
+
JSON_METADATA = 'metadata.json'
|
37
|
+
RUBY_METADATA = 'metadata.rb'
|
38
|
+
|
39
|
+
UNDEFINED_COOKBOOK_NAME = 'undefined'
|
40
|
+
KNIFE_METADATA_SCRIPT_NAME = 'knife_metadata.rb'
|
41
|
+
|
42
|
+
JAILED_FILE_SIZE_CONSTRAINT = 128 * 1024 # 128 KB
|
43
|
+
FREED_FILE_SIZE_CONSTRAINT = 64 * 1024 # 64 KB
|
44
|
+
|
45
|
+
TARBALL_CREATE_TIMEOUT = 30 # ..to create the tarball
|
46
|
+
TARBALL_ARCHIVE_NAME = 'cookbook.tar'
|
47
|
+
|
48
|
+
# exceptions
|
49
|
+
class MetadataError < Exception; end
|
50
|
+
|
51
|
+
def tls
|
52
|
+
Thread.current[self.class.to_s.to_sym] ||= {}
|
53
|
+
end
|
54
|
+
|
55
|
+
def begin(resource)
|
56
|
+
@read_blk = nil
|
57
|
+
@cookbook = resource
|
58
|
+
true
|
59
|
+
end
|
60
|
+
|
61
|
+
# Complete a scan for the given resource.
|
62
|
+
#
|
63
|
+
# === Parameters ===
|
64
|
+
# resource(RightScraper::Resources::Base):: resource to scan
|
65
|
+
def end(resource)
|
66
|
+
@logger.operation(:metadata_parsing) do
|
67
|
+
if @read_blk
|
68
|
+
metadata = ::JSON.parse(@read_blk.call)
|
69
|
+
resource.metadata = metadata
|
70
|
+
|
71
|
+
# check for undefined cookbook name.
|
72
|
+
#
|
73
|
+
# note that many specs in right_scraper use badly formed metadata
|
74
|
+
# that is not even a hash so, to avoid having to fix all of them
|
75
|
+
# (and also in case the user's metadata.json is not a hash) check
|
76
|
+
# for the has_key? method.
|
77
|
+
#
|
78
|
+
# if real metadata is not a hash then that should cause failure
|
79
|
+
# at a higher level. if the cookbook name is actually defined as
|
80
|
+
# being 'undefined' then the user gets a warning anyway.
|
81
|
+
if (metadata.respond_to?(:has_key?) &&
|
82
|
+
metadata['name'] == UNDEFINED_COOKBOOK_NAME)
|
83
|
+
message =
|
84
|
+
'Cookbook name appears to be undefined and has been' +
|
85
|
+
' supplied automatically.'
|
86
|
+
@logger.note_warning(message)
|
87
|
+
end
|
88
|
+
else
|
89
|
+
# should not be scanning at all unless one of the metadata files was
|
90
|
+
# detected before starting scan.
|
91
|
+
fail 'Unexpected missing metadata'
|
92
|
+
end
|
93
|
+
end
|
94
|
+
true
|
95
|
+
ensure
|
96
|
+
@read_blk = nil
|
97
|
+
@cookbook = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
# All done scanning this repository, we can tear down the warden container we may or
|
101
|
+
# may not have created while parsing the cookbooks for this repository.
|
102
|
+
#
|
103
|
+
def finish
|
104
|
+
begin
|
105
|
+
FileUtils.remove_entry_secure(tls[:tmpdir]) if tls[:tmpdir]
|
106
|
+
rescue ::Exception => e
|
107
|
+
@logger.note_warning(e.message)
|
108
|
+
end
|
25
109
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
# === Parameters
|
33
|
-
# cookbook(RightScraper::Resources::Cookbook):: cookbook to scan
|
34
|
-
def begin(cookbook)
|
35
|
-
@cookbook = cookbook
|
110
|
+
if warden = tls[:warden]
|
111
|
+
begin
|
112
|
+
warden.cleanup
|
113
|
+
rescue ::Exception => e
|
114
|
+
@logger.note_warning(e.message)
|
115
|
+
end
|
36
116
|
end
|
37
117
|
|
38
|
-
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
118
|
+
ensure
|
119
|
+
# Cleanup thread-local storage
|
120
|
+
tls.clear
|
121
|
+
end
|
122
|
+
|
123
|
+
# Notice a file during scanning.
|
124
|
+
#
|
125
|
+
# === Block
|
126
|
+
# Return the data for this file. We use a block because it may
|
127
|
+
# not always be necessary to read the data.
|
128
|
+
#
|
129
|
+
# === Parameters
|
130
|
+
# relative_position(String):: relative pathname for the file from root of cookbook
|
131
|
+
def notice(relative_position, &blk)
|
132
|
+
case relative_position
|
133
|
+
when JSON_METADATA
|
134
|
+
# preferred over RUBY_METADATA.
|
135
|
+
@read_blk = blk
|
136
|
+
when RUBY_METADATA
|
137
|
+
# defer to any JSON_METADATA, which we hope refers to the same info.
|
138
|
+
@read_blk ||= self.method(:generate_metadata_json)
|
139
|
+
end
|
140
|
+
true
|
141
|
+
end
|
142
|
+
|
143
|
+
# Notice a directory during scanning. Since metadata.{json,rb} is by
|
144
|
+
# definition only in the root directory we don't need to recurse,
|
145
|
+
# but we do need to go into the first directory (identified by
|
146
|
+
# +relative_position+ being +nil+).
|
147
|
+
#
|
148
|
+
# === Parameters
|
149
|
+
# relative_position(String):: relative pathname for the directory from root of cookbook
|
150
|
+
#
|
151
|
+
# === Returns
|
152
|
+
# Boolean:: should the scanning recurse into the directory
|
153
|
+
def notice_dir(relative_position)
|
154
|
+
relative_position == nil
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
# Executes the 'metadata.rb' file from a cookbook. Because we don't want
|
160
|
+
# to evaluate arbitrary Ruby code, we need to sandbox it first using
|
161
|
+
# Warden.
|
162
|
+
#
|
163
|
+
# in order for knife metadata to succeed in the general case we need to
|
164
|
+
# copy some (but not all) of the cookbook directory AND its ancestors (if
|
165
|
+
# any) into the container. we will try and restrict copying to what might
|
166
|
+
# plausibly be referenced by 'metadata.rb' but this could be anything like
|
167
|
+
# a LICENSE, README, etc. the best heuristic seems to be to copy any file
|
168
|
+
# whose size is small (less than 128K) because 'metadata.rb' should not be
|
169
|
+
# executing binaries and should only consume text files of a reasonable
|
170
|
+
# size. if these restrictions cause a problem then the user is free to
|
171
|
+
# pre-knife his own 'metadata.json' file and check it into the repo.
|
172
|
+
#
|
173
|
+
# note the selection of the jailed cookbook dir is specific to the
|
174
|
+
# behavior of knife metadata. the cookbook name is defined when the
|
175
|
+
# 'metadata.rb' declares the name attribute, but the name attribute is
|
176
|
+
# optional. when no name attribute is declared, the metadata automagically
|
177
|
+
# uses the parent directory name. this works okay so long as the parent
|
178
|
+
# directory name is actually the cookbook name. in the case of a repo with
|
179
|
+
# 'metadata.rb' at the root (i.e. no checked-in parent directory) then the
|
180
|
+
# cookbook name is undefined. in this case, we want the cookbook name to
|
181
|
+
# be 'undefined' to remind the user to declare the name explicitly.
|
182
|
+
#
|
183
|
+
# === Returns
|
184
|
+
# @return [String] metadata JSON text
|
185
|
+
def generate_metadata_json
|
186
|
+
@logger.operation(:metadata_generation) do
|
187
|
+
# note we will use the same tmpdir path inside and outside the
|
188
|
+
# container only because it is non-trivial to invoke mktmpdir inside
|
189
|
+
# the container.
|
190
|
+
tmpdir = create_tmpdir
|
191
|
+
|
192
|
+
# arrest
|
193
|
+
knife_metadata_script_path = ::File.join(tmpdir, KNIFE_METADATA_SCRIPT_NAME)
|
194
|
+
jailed_repo_dir = ::File.join(tmpdir, UNDEFINED_COOKBOOK_NAME)
|
195
|
+
jailed_cookbook_dir = (@cookbook.pos == '.' && jailed_repo_dir) || ::File.join(jailed_repo_dir, @cookbook.pos)
|
196
|
+
jailed_metadata_json_path = ::File.join(jailed_cookbook_dir, JSON_METADATA)
|
197
|
+
freed_metadata_json_path = ::File.join(tmpdir, JSON_METADATA)
|
198
|
+
|
199
|
+
# police brutality
|
200
|
+
copy_out = { jailed_metadata_json_path => freed_metadata_json_path }
|
201
|
+
|
202
|
+
begin
|
203
|
+
# jail the repo
|
204
|
+
unless warden = tls[:warden]
|
205
|
+
# Get a list of the files in the repo we need
|
206
|
+
create_knife_metadata_script(knife_metadata_script_path)
|
207
|
+
copy_in = generate_copy_in
|
208
|
+
copy_in << knife_metadata_script_path
|
209
|
+
|
210
|
+
# Create the container, one for all in this repo
|
211
|
+
warden = tls[:warden] = create_warden
|
212
|
+
|
213
|
+
# tar up the required pieces of the repo and copy them into the container
|
214
|
+
cookbook_tarball_path = ::File.join(tmpdir, TARBALL_ARCHIVE_NAME)
|
215
|
+
# prosecute
|
216
|
+
create_cookbook_tarball(cookbook_tarball_path, copy_in, jailed_repo_dir)
|
217
|
+
|
218
|
+
# unarchive the tarball on the otherside (this is faster than single file copies)
|
219
|
+
cmd = "tar -Pxf #{cookbook_tarball_path.inspect}"
|
220
|
+
warden.run_command_in_jail(cmd, cookbook_tarball_path, nil)
|
50
221
|
end
|
222
|
+
|
223
|
+
# Generate the metadata
|
224
|
+
cmd = "export LC_ALL='en_US.UTF-8'; ruby #{knife_metadata_script_path.inspect} #{jailed_cookbook_dir.inspect}"
|
225
|
+
warden.run_command_in_jail(cmd, nil, copy_out)
|
226
|
+
|
227
|
+
# constraining the generate file size is debatable, but our UI
|
228
|
+
# attempts to load metadata JSON into memory far too often to be
|
229
|
+
# blasé about generating multi-megabyte JSON files.
|
230
|
+
unless ::File.file?(freed_metadata_json_path)
|
231
|
+
raise MetadataError, 'Generated JSON file not found.'
|
232
|
+
end
|
233
|
+
freed_metadata_json_size = ::File.stat(freed_metadata_json_path).size
|
234
|
+
if freed_metadata_json_size <= FREED_FILE_SIZE_CONSTRAINT
|
235
|
+
# parole for good behavior
|
236
|
+
return ::File.read(freed_metadata_json_path)
|
237
|
+
else
|
238
|
+
# life imprisonment.
|
239
|
+
raise MetadataError,
|
240
|
+
"Generated metadata size of" +
|
241
|
+
" #{freed_metadata_json_size / 1024} KB" +
|
242
|
+
" exceeded the allowed limit of" +
|
243
|
+
" #{FREED_FILE_SIZE_CONSTRAINT / 1024} KB"
|
244
|
+
end
|
245
|
+
rescue ::RightScraper::Processes::Warden::LinkError => e
|
246
|
+
raise MetadataError,
|
247
|
+
"Failed to generate metadata from source: #{e.message}" +
|
248
|
+
"\n#{e.link_result.stdout}" +
|
249
|
+
"\n#{e.link_result.stderr}"
|
51
250
|
end
|
52
251
|
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def stdout_tarball(data)
|
255
|
+
@stdout_buffer << data
|
256
|
+
end
|
257
|
+
|
258
|
+
def stderr_tarball(data)
|
259
|
+
@stderr_buffer.safe_buffer_data(data)
|
260
|
+
end
|
261
|
+
|
262
|
+
def timeout_tarball
|
263
|
+
raise MetadataError,
|
264
|
+
"Timed out waiting for tarball to build.\n" +
|
265
|
+
"stdout: #{@stdout_buffer.join}\n" +
|
266
|
+
"stderr: #{@stderr_buffer.display_text}"
|
267
|
+
end
|
53
268
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
269
|
+
# @param [String] dest_file
|
270
|
+
# @param [Array] contents
|
271
|
+
# @param [String] dest_path
|
272
|
+
def create_cookbook_tarball(dest_file, contents, dest_path)
|
273
|
+
@logger.operation(:tarball_generation) do
|
274
|
+
tarball_cmd = [
|
275
|
+
'tar',
|
276
|
+
"-Pcf #{dest_file}",
|
277
|
+
"--transform='s,#{@cookbook.repo_dir},#{dest_path},'",
|
278
|
+
'-T', '-'
|
279
|
+
]
|
280
|
+
|
281
|
+
@stdout_buffer = []
|
282
|
+
@stderr_buffer = ::RightScale::RightPopen::SafeOutputBuffer.new
|
283
|
+
begin
|
284
|
+
::RightScale::RightPopen.popen3_sync(
|
285
|
+
tarball_cmd.join(' '),
|
286
|
+
:target => self,
|
287
|
+
:timeout_handler => :timeout_tarball,
|
288
|
+
:input => contents.join("\n"),
|
289
|
+
:stderr_handler => :stderr_tarball,
|
290
|
+
:stdout_handler => :stdout_tarball,
|
291
|
+
:inherit_io => true, # avoid killing any rails connection
|
292
|
+
:timeout_seconds => TARBALL_CREATE_TIMEOUT)
|
293
|
+
rescue Exception => e
|
294
|
+
raise MetadataError,
|
295
|
+
"Failed to generate cookbook tarball from source files: #{e.message}\n" +
|
296
|
+
"stdout: #{@stdout_buffer.join}\n" +
|
297
|
+
"stderr: #{@stderr_buffer.display_text}"
|
298
|
+
end
|
66
299
|
end
|
67
300
|
end
|
301
|
+
|
302
|
+
# generates a script that runs Chef's knife tool. it presumes the jail
|
303
|
+
# contains a ruby interpreter that has chef installed as a gem.
|
304
|
+
#
|
305
|
+
# we want to avoid using the knife command line only because it requires a
|
306
|
+
# '$HOME/.chef/knife.rb' configuration file even though we won't use that
|
307
|
+
# configuration file in any way. :@
|
308
|
+
#
|
309
|
+
# the simplest solution is to execute the knife tool within a ruby script
|
310
|
+
# because it has no pre-configuration requirement and it does not require
|
311
|
+
# the knife binstub to be on the PATH.
|
312
|
+
def create_knife_metadata_script(script_path)
|
313
|
+
script = <<EOS
|
314
|
+
require 'rubygems'
|
315
|
+
require 'chef'
|
316
|
+
require 'chef/knife/cookbook_metadata'
|
317
|
+
|
318
|
+
jailed_cookbook_dir = ARGV.pop
|
319
|
+
knife_metadata = ::Chef::Knife::CookbookMetadata.new
|
320
|
+
knife_metadata.name_args = [::File.basename(jailed_cookbook_dir)]
|
321
|
+
knife_metadata.config[:cookbook_path] = ::File.dirname(jailed_cookbook_dir)
|
322
|
+
knife_metadata.run
|
323
|
+
EOS
|
324
|
+
::File.open(script_path, 'w') { |f| f.puts script }
|
325
|
+
true
|
326
|
+
end
|
327
|
+
|
328
|
+
# need to enumerate files relative to the cookbook directory because we
|
329
|
+
# have no idea what the metadata script will attempt to consume from the
|
330
|
+
# files available in its repository. it may even attempt to manipulate
|
331
|
+
# files in the system or go out to the network, which may or may not be
|
332
|
+
# allowed by the conditions of the jail.
|
333
|
+
#
|
334
|
+
# some cookbooks (for Windows, especially) will have large binaries
|
335
|
+
# included in the repository. we don't want to spend time copying these
|
336
|
+
# files into jail so limit the files that metadata can reference by size.
|
337
|
+
# presumably the jail would also be limiting disk space so it is important
|
338
|
+
# to avoid this source of failure.
|
339
|
+
#
|
340
|
+
# again, the user can work around these contraints by generating his own
|
341
|
+
# metadata and checking it into the repository.
|
342
|
+
#
|
343
|
+
# @return [Array] list of files to copy into jail
|
344
|
+
def generate_copy_in()
|
345
|
+
copy_in = []
|
346
|
+
start_path = @cookbook.repo_dir
|
347
|
+
recursive_generate_copy_in(copy_in, start_path)
|
348
|
+
copy_in
|
349
|
+
end
|
350
|
+
|
351
|
+
# recursive part of generate_copy_in
|
352
|
+
def recursive_generate_copy_in(copy_in, current_path)
|
353
|
+
limited_files_of(current_path) { |file| copy_in << file }
|
354
|
+
directories_of(current_path) do |dir|
|
355
|
+
recursive_generate_copy_in(copy_in, ::File.join(dir))
|
356
|
+
end
|
357
|
+
true
|
358
|
+
end
|
359
|
+
|
360
|
+
# yields files in parent meeting size criteria.
|
361
|
+
def limited_files_of(parent)
|
362
|
+
::Dir["#{parent}/*"].each do |item|
|
363
|
+
if ::File.file?(item)
|
364
|
+
if ::File.stat(item).size <= JAILED_FILE_SIZE_CONSTRAINT
|
365
|
+
yield item
|
366
|
+
else
|
367
|
+
if ::File.basename(item) == RUBY_METADATA
|
368
|
+
raise MetadataError,
|
369
|
+
'Metadata source file' +
|
370
|
+
" #{relative_to_repo_dir(item).inspect}" +
|
371
|
+
' in repository exceeded size constraint of' +
|
372
|
+
" #{JAILED_FILE_SIZE_CONSTRAINT / 1024} KB"
|
373
|
+
else
|
374
|
+
message = 'Ignored a repository file during metadata' +
|
375
|
+
' generation due to exceeding size constraint of' +
|
376
|
+
" #{JAILED_FILE_SIZE_CONSTRAINT / 1024} KB:" +
|
377
|
+
" #{relative_to_repo_dir(item).inspect}"
|
378
|
+
@logger.info(message)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
# yields directories of parent.
|
386
|
+
def directories_of(parent)
|
387
|
+
::Dir["#{parent}/*"].each do |item|
|
388
|
+
case item
|
389
|
+
when '.', '..'
|
390
|
+
# do nothing
|
391
|
+
else
|
392
|
+
yield item if ::File.directory?(item)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
# converts the given absolute path to be relative to repo_dir.
|
398
|
+
def relative_to_repo_dir(path)
|
399
|
+
path[(@cookbook.repo_dir.length + 1)..-1]
|
400
|
+
end
|
401
|
+
|
402
|
+
# factory method for an object capable of running command in jail
|
403
|
+
# (convenient for testing).
|
404
|
+
def create_warden
|
405
|
+
::RightScraper::Processes::Warden.new
|
406
|
+
end
|
407
|
+
|
408
|
+
# factory method for tmpdir (convenient for testing).
|
409
|
+
def create_tmpdir
|
410
|
+
tls[:tmpdir] ||= ::Dir.mktmpdir
|
411
|
+
end
|
412
|
+
|
68
413
|
end
|
69
414
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -20,65 +20,68 @@
|
|
20
20
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
|
+
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/scanners'
|
26
|
+
|
23
27
|
require 'right_aws'
|
24
28
|
require 'json'
|
25
29
|
require 'digest/md5'
|
26
30
|
|
27
|
-
module RightScraper
|
28
|
-
module Scanners
|
29
|
-
# Upload scanned files to an S3 bucket.
|
30
|
-
class CookbookS3Upload < Base
|
31
|
-
# Create a new S3Upload. In addition to the options recognized
|
32
|
-
# by Scanner, this class recognizes <tt>:s3_key</tt>,
|
33
|
-
# <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
|
34
|
-
# of those.
|
35
|
-
#
|
36
|
-
# === Options
|
37
|
-
# <tt>:s3_key</tt>:: Required. S3 access key.
|
38
|
-
# <tt>:s3_secret</tt>:: Required. S3 secret key.
|
39
|
-
# <tt>:s3_bucket</tt>:: Required. Bucket to upload cookbooks to.
|
40
|
-
#
|
41
|
-
# === Parameters
|
42
|
-
# options(Hash):: scanner options
|
43
|
-
def initialize(options={})
|
44
|
-
super
|
45
|
-
s3_key = options.fetch(:s3_key)
|
46
|
-
s3_secret = options.fetch(:s3_secret)
|
47
|
-
s3 = RightAws::S3.new(aws_access_key_id=s3_key,
|
48
|
-
aws_secret_access_key=s3_secret,
|
49
|
-
:logger => Logger.new)
|
50
|
-
@bucket = s3.bucket(options.fetch(:s3_bucket))
|
51
|
-
raise "Need an actual, existing S3 bucket!" if @bucket.nil?
|
52
|
-
end
|
31
|
+
module RightScraper::Scanners
|
53
32
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
33
|
+
# Upload scanned files to an S3 bucket.
|
34
|
+
class CookbookS3Upload < ::RightScraper::Scanners::Base
|
35
|
+
# Create a new S3Upload. In addition to the options recognized
|
36
|
+
# by Scanner, this class recognizes <tt>:s3_key</tt>,
|
37
|
+
# <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
|
38
|
+
# of those.
|
39
|
+
#
|
40
|
+
# === Options
|
41
|
+
# <tt>:s3_key</tt>:: Required. S3 access key.
|
42
|
+
# <tt>:s3_secret</tt>:: Required. S3 secret key.
|
43
|
+
# <tt>:s3_bucket</tt>:: Required. Bucket to upload cookbooks to.
|
44
|
+
#
|
45
|
+
# === Parameters
|
46
|
+
# options(Hash):: scanner options
|
47
|
+
def initialize(options={})
|
48
|
+
super
|
49
|
+
s3_key = options.fetch(:s3_key)
|
50
|
+
s3_secret = options.fetch(:s3_secret)
|
51
|
+
s3 = RightAws::S3.new(aws_access_key_id=s3_key,
|
52
|
+
aws_secret_access_key=s3_secret,
|
53
|
+
:logger => @logger)
|
54
|
+
@bucket = s3.bucket(options.fetch(:s3_bucket))
|
55
|
+
raise "Need an actual, existing S3 bucket!" if @bucket.nil?
|
56
|
+
end
|
57
|
+
|
58
|
+
# Upon ending a scan for a cookbook, upload the cookbook
|
59
|
+
# contents to S3.
|
60
|
+
#
|
61
|
+
# === Parameters
|
62
|
+
# cookbook(RightScraper::Cookbook):: cookbook to scan
|
63
|
+
def end(cookbook)
|
64
|
+
path = File.join('Cooks', cookbook.resource_hash)
|
65
|
+
unless @bucket.key(path).exists?
|
66
|
+
contents = cookbook.manifest_json
|
67
|
+
@bucket.put(path, contents)
|
65
68
|
end
|
69
|
+
end
|
66
70
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
end
|
71
|
+
# Upload a file during scanning.
|
72
|
+
#
|
73
|
+
# === Block
|
74
|
+
# Return the data for this file. We use a block because it may
|
75
|
+
# not always be necessary to read the data.
|
76
|
+
#
|
77
|
+
# === Parameters
|
78
|
+
# relative_position(String):: relative pathname for file from root of cookbook
|
79
|
+
def notice(relative_position)
|
80
|
+
contents = yield
|
81
|
+
name = Digest::MD5.hexdigest(contents)
|
82
|
+
path = File.join('Files', name)
|
83
|
+
unless @bucket.key(path).exists?
|
84
|
+
@bucket.put(path, contents)
|
82
85
|
end
|
83
86
|
end
|
84
87
|
end
|