ruby-manta 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/example.rb ADDED
@@ -0,0 +1,78 @@
1
+ require 'ruby-manta'
2
+
3
+ # You'll need to provide these four environment variables to run this
4
+ # example. E.g.:
5
+ # USER=john KEY=~/.ssh/john HOST=https://us-east.manta.joyent.com DIR=. ruby example.rb
6
+ host = ENV['HOST']
7
+ user = ENV['USER']
8
+ priv_key = ENV['KEY' ]
9
+ upload_dir = ENV['DIR' ]
10
+
11
+ # Read in private key, create a MantaClient instance. MantaClient is
12
+ # thread-safe and provides persistent connections with pooling, so you'll
13
+ # only ever need a single instance of this in a program.
14
+ priv_key_data = File.read(priv_key)
15
+ client = MantaClient.new(host, user, priv_key_data,
16
+ :disable_ssl_verification => true)
17
+
18
+ # Create an directory in Manta solely for this example run.
19
+ dir_path = '/' + user + '/stor/ruby-manta-example'
20
+ client.put_directory(dir_path)
21
+
22
+ # Upload files in a local directory to the Manta directory.
23
+ file_paths = Dir[upload_dir + '/*'].select { |p| File.file? p }
24
+ file_paths.each do |file_path|
25
+ file_name = File.basename(file_path)
26
+ # Be careful about binary files and file encodings in Ruby 1.9. If you don't
27
+ # use ASCII-8BIT (forced by 'rb' below), expect timeouts while PUTing an
28
+ # object.
29
+ file_data = File.open(file_path, 'rb') { |f| f.read }
30
+ client.put_object(dir_path + '/' + file_name, file_data)
31
+ end
32
+
33
+ # This example job runs the wc UNIX command on every object for the
34
+ # map phase, then uses awk during reduce to sum up the three numbers each wc
35
+ # returned.
36
+ job_details = {
37
+ :name => 'total word count',
38
+ :phases => [ {
39
+ :exec => 'wc'
40
+ }, {
41
+ :type => 'reduce',
42
+ :exec => "awk '{ l += $1; w += $2; c += $3 } END { print l, w, c }'"
43
+ } ]
44
+ }
45
+
46
+ # Create the job, then add the objects the job should operate on.
47
+ job_path, _ = client.create_job(job_details)
48
+
49
+ entries, _ = client.list_directory(dir_path)
50
+ obj_paths = entries.select { |e| e['type'] == 'object' }.
51
+ map { |e| dir_path + '/' + e['name'] }
52
+
53
+ client.add_job_keys(job_path, obj_paths)
54
+
55
+ # Tell Manta we're done adding objects to the job. Manta doesn't need this
56
+ # to start running a job -- you can see map results without it, for
57
+ # example -- but reduce phases in particular depend on all mapping
58
+ # finishing.
59
+ client.end_job_input(job_path)
60
+
61
+ # Poll until Manta finishes the job.
62
+ begin
63
+ sleep 1
64
+ job, _ = client.get_job(job_path)
65
+ end while job['state'] != 'done'
66
+
67
+ # We know in this case there will be only one result. Fetch it and
68
+ # display it.
69
+ results, _ = client.get_job_output(job_path)
70
+ data, _ = client.get_object(results[0])
71
+ puts data
72
+
73
+ # Clean up; remove objects and directory.
74
+ obj_paths.each do |obj_path|
75
+ client.delete_object(obj_path)
76
+ end
77
+
78
+ client.delete_directory(dir_path)
data/lib/ruby-manta.rb ADDED
@@ -0,0 +1,925 @@
1
+ # Copyright (c) 2012, Joyent, Inc. All rights reserved.
2
+ #
3
+ # ruby-manta is a simple low-abstraction layer which communicates with Joyent's
4
+ # Manta service.
5
+ #
6
+ # Manta is an HTTP-accessible object store supporting UNIX-based map-reduce
7
+ # jobs. Through ruby-manta a programmer can save/overwrite/delete objects
8
+ # stored on a Manta service, or run map-reduce jobs over those objects.
9
+ #
10
+ # ruby-manta should be thread-safe, and supports pooling of keep-alive
11
+ # connections to the same server (through HTTPClient). It only relies on the
12
+ # standard library and two pure Ruby libraries, so it should work anywhere.
13
+ #
14
+ # For more information about Manta and general ruby-manta usage, please see
15
+ # README.md.
16
+
17
+
18
+
19
+ require 'openssl'
20
+ require 'net/ssh'
21
+ require 'httpclient'
22
+ require 'base64'
23
+ require 'digest'
24
+ require 'time'
25
+ require 'json'
26
+ require 'cgi'
27
+
28
+ require File.expand_path('../version', __FILE__)
29
+
30
+
31
+
32
+ class MantaClient
33
+ DEFAULT_ATTEMPTS = 3
34
+ DEFAULT_CONNECT_TIMEOUT = 5
35
+ DEFAULT_SEND_TIMEOUT = 60
36
+ DEFAULT_RECEIVE_TIMEOUT = 60
37
+ MAX_LIMIT = 1000
38
+ HTTP_AGENT = "ruby-manta/#{LIB_VERSION} (#{RUBY_PLATFORM}; #{OpenSSL::OPENSSL_VERSION}) ruby/#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}"
39
+ HTTP_SIGNATURE = 'Signature keyId="/%s/keys/%s",algorithm="%s",signature="%s"'
40
+ OBJ_PATH_REGEX = Regexp.new('^/.+/(?:stor|public|reports)(?:/|$)')
41
+ JOB_PATH_REGEX = Regexp.new('^/.+?/jobs/.+?(?:/live|$)')
42
+
43
+ # match one or more protocol and hostnames, with optional port numbers.
44
+ # E.g. "http://example.com https://example.com:8443"
45
+ CORS_ORIGIN_REGEX = Regexp.new('^\w+://[^\s\:]+(?:\:\d+)?' +
46
+ '(?:\s\w+://[^\s\:]+(?:\:\d+)?)*$')
47
+ CORS_HEADERS_REGEX = Regexp.new('^[\w-]+(?:, [\w-]+)*$')
48
+ CORS_METHODS = [ 'GET', 'POST', 'PUT', 'DELETE', 'OPTIONS' ]
49
+
50
+ ERROR_CLASSES = [ 'AuthorizationFailed', 'AuthSchemeNotAllowed',
51
+ 'BadRequest', 'Checksum', 'ConcurrentRequest',
52
+ 'ContentLength', 'ContentMD5Mismatch',
53
+ 'DirectoryDoesNotExist', 'DirectoryExists',
54
+ 'DirectoryNotEmpty', 'DirectoryOperation',
55
+ 'EntityExists', 'Internal', 'InvalidArgument',
56
+ 'InvalidAuthToken', 'InvalidCredentials',
57
+ 'InvalidDurabilityLevel', 'InvalidJob', 'InvalidKeyId',
58
+ 'InvalidLink', 'InvalidSignature', 'InvalidJobState',
59
+ 'JobNotFound', 'JobState', 'KeyDoesNotExist',
60
+ 'LinkNotFound', 'LinkNotObject', 'LinkRequired',
61
+ 'NotAcceptable', 'NotEnoughSpace', 'ParentNotDirectory',
62
+ 'PreconditionFailed', 'PreSignedRequest',
63
+ 'RequestEntityTooLarge', 'ResourceNotFound',
64
+ 'RootDirectory', 'SecureTransportRequired',
65
+ 'ServiceUnavailable', 'SourceObjectNotFound',
66
+ 'SSLRequired', 'TaskInit', 'UploadTimeout',
67
+ 'UserDoesNotExist', 'UserTaskError',
68
+ # and errors that are specific to this class:
69
+ 'CorruptResult', 'UnknownError',
70
+ 'UnsupportedKey' ]
71
+
72
+
73
+
74
+ # Initialize a MantaClient instance.
75
+ #
76
+ # priv_key_data is data read directly from an SSH private key (i.e. RFC 4716
77
+ # format). The method can also accept several optional args: :connect_timeout,
78
+ # :send_timeout, :receive_timeout, :disable_ssl_verification and :attempts.
79
+ # The timeouts are in seconds, and :attempts determines the default number of
80
+ # attempts each method will make upon receiving recoverable errors.
81
+ #
82
+ # Will throw an exception if given a key whose format it doesn't understand.
83
+ def initialize(host, user, priv_key_data, opts = {})
84
+ raise ArgumentError unless host =~ /^https{0,1}:\/\/.*[^\/]/
85
+ raise ArgumentError unless user.is_a?(String) && user.size > 0
86
+
87
+ @host = host
88
+ @user = user
89
+
90
+ @attempts = opts[:attempts] || DEFAULT_ATTEMPTS
91
+ raise ArgumentError unless @attempts > 0
92
+
93
+ if priv_key_data =~ /BEGIN RSA/
94
+ @digest = OpenSSL::Digest::SHA1.new
95
+ @digest_name = 'rsa-sha1'
96
+ algorithm = OpenSSL::PKey::RSA
97
+ elsif priv_key_data =~ /BEGIN DSA/
98
+ @digest = OpenSSL::Digest::DSS1.new
99
+ @digest_name = 'dsa-sha1'
100
+ algorithm = OpenSSL::PKey::DSA
101
+ else
102
+ raise UnsupportedKeyError
103
+ end
104
+
105
+ @priv_key = algorithm.new(priv_key_data)
106
+ @fingerprint = OpenSSL::Digest::MD5.hexdigest(@priv_key.to_blob).
107
+ scan(/../).join(':')
108
+
109
+ @client = HTTPClient.new
110
+ @client.connect_timeout = opts[:connect_timeout] || DEFAULT_CONNECT_TIMEOUT
111
+ @client.send_timeout = opts[:send_timeout ] || DEFAULT_SEND_TIMEOUT
112
+ @client.receive_timeout = opts[:receive_timeout] || DEFAULT_RECEIVE_TIMEOUT
113
+ @client.ssl_config.verify_mode = nil if opts[:disable_ssl_verification]
114
+
115
+ @job_base = '/' + user + '/jobs'
116
+ end
117
+
118
+
119
+
120
+ # Uploads object data to Manta to the given path, along with a computed MD5
121
+ # hash.
122
+ #
123
+ # The path must start with /<user>/stor or /<user/public. Data can be any
124
+ # sequence of octets. The HTTP Content-Type stored on Manta can be set
125
+ # with an optional :content_type argument; the default is
126
+ # application/octet-stream. The number of distributed replicates of an object
127
+ # stored in Manta can be set with an optional :durability_level; the default
128
+ # is 2.
129
+ #
130
+ # Returns true along with received HTTP headers.
131
+ #
132
+ # If there was an unrecoverable error, throws an exception. On connection or
133
+ # corruption errors, more attempts will be made; the number of attempts can
134
+ # be altered by passing in :attempts.
135
+ def put_object(obj_path, data, opts = {})
136
+ url = obj_url(obj_path)
137
+
138
+ opts[:data] = data
139
+ headers = gen_headers(opts)
140
+
141
+ cors_headers = gen_cors_headers(opts)
142
+ headers = headers.concat(cors_headers)
143
+
144
+ durability_level = opts[:durability_level]
145
+ if durability_level
146
+ raise ArgumentError unless durability_level > 0
147
+ headers.push([ 'Durability-Level', durability_level ])
148
+ end
149
+
150
+ content_type = opts[:content_type]
151
+ if content_type
152
+ raise ArgumentError unless content_type.is_a? String
153
+ headers.push([ 'Content-Type', content_type ])
154
+ end
155
+
156
+ attempt(opts[:attempts]) do
157
+ result = @client.put(url, data, headers)
158
+ raise unless result.is_a? HTTP::Message
159
+
160
+ return true, result.headers if [204, 304].include? result.status
161
+ raise_error(result)
162
+ end
163
+ end
164
+
165
+
166
+
167
+ # Get an object from Manta at a given path, and checks it's uncorrupted.
168
+ #
169
+ # The path must start with /<user>/stor or /<user/public and point at an
170
+ # actual object, as well as output objects for jobs. :head => true can
171
+ # optionally be passed in to do a HEAD instead of a GET.
172
+ #
173
+ # Returns the retrieved data along with received HTTP headers.
174
+ #
175
+ # If there was an unrecoverable error, throws an exception. On connection or
176
+ # corruption errors, more attempts will be made; the number of attempts can
177
+ # be altered by passing in :attempts.
178
+ def get_object(obj_path, opts = {})
179
+ url = obj_url(obj_path)
180
+ headers = gen_headers(opts)
181
+
182
+ attempt(opts[:attempts]) do
183
+ method = opts[:head] ? :head : :get
184
+ result = @client.send(method, url, nil, headers)
185
+ raise unless result.is_a? HTTP::Message
186
+
187
+ if result.status == 200
188
+ return true, result.headers if method == :head
189
+
190
+ sent_md5 = result.headers['Content-MD5']
191
+ received_md5 = Digest::MD5.base64digest(result.body)
192
+ raise CorruptResult if sent_md5 != received_md5
193
+
194
+ return result.body, result.headers
195
+ elsif result.status == 304
196
+ return nil, result.headers
197
+ end
198
+
199
+ raise_error(result)
200
+ end
201
+ end
202
+
203
+
204
+
205
+ # Deletes an object off Manta at a given path.
206
+ #
207
+ # The path must start with /<user>/stor or /<user/public and point at an
208
+ # actual object.
209
+ #
210
+ # Returns true along with received HTTP headers.
211
+ #
212
+ # If there was an unrecoverable error, throws an exception. On connection or
213
+ # corruption errors, more attempts will be made; the number of attempts can
214
+ # be altered by passing in :attempts.
215
+ def delete_object(obj_path, opts = {})
216
+ url = obj_url(obj_path)
217
+ headers = gen_headers(opts)
218
+
219
+ attempt(opts[:attempts]) do
220
+ result = @client.delete(url, nil, headers)
221
+ raise unless result.is_a? HTTP::Message
222
+
223
+ return true, result.headers if result.status == 204
224
+ raise_error(result)
225
+ end
226
+ end
227
+
228
+
229
+
230
+ # Creates a directory on Manta at a given path.
231
+ #
232
+ # The path must start with /<user>/stor or /<user/public.
233
+ #
234
+ # Returns true along with received HTTP headers.
235
+ #
236
+ # If there was an unrecoverable error, throws an exception. On connection or
237
+ # corruption errors, more attempts will be made; the number of attempts can
238
+ # be altered by passing in :attempts.
239
+ def put_directory(dir_path, opts = {})
240
+ url = obj_url(dir_path)
241
+ headers = gen_headers(opts)
242
+ headers.push([ 'Content-Type', 'application/json; type=directory' ])
243
+
244
+ cors_headers = gen_cors_headers(opts)
245
+ headers = headers.concat(cors_headers)
246
+
247
+ attempt(opts[:attempts]) do
248
+ result = @client.put(url, nil, headers)
249
+ raise unless result.is_a? HTTP::Message
250
+
251
+ return true, result.headers if result.status == 204
252
+ raise_error(result)
253
+ end
254
+ end
255
+
256
+
257
+
258
+ # Gets a lexicographically sorted directory listing on Manta at a given path,
259
+ #
260
+ # The path must start with /<user>/stor or /<user/public and point at an
261
+ # actual directory. :limit optionally changes the maximum number of entries;
262
+ # the default is 1000. If given :marker, an object name in the directory,
263
+ # returned directory entries will begin from that point. :head => true can
264
+ # optionally be passed in to do a HEAD instead of a GET.
265
+ #
266
+ # Returns an array of hash objects, each object representing a directory
267
+ # entry. Also returns the received HTTP headers.
268
+ #
269
+ # If there was an unrecoverable error, throws an exception. On connection or
270
+ # corruption errors, more attempts will be made; the number of attempts can
271
+ # be altered by passing in :attempts.
272
+ def list_directory(dir_path, opts = {})
273
+ url = obj_url(dir_path)
274
+ headers = gen_headers(opts)
275
+ query_parameters = {}
276
+
277
+ limit = opts[:limit] || MAX_LIMIT
278
+ raise ArgumentError unless 0 < limit && limit <= MAX_LIMIT
279
+ query_parameters[:limit] = limit
280
+
281
+ marker = opts[:marker]
282
+ if marker
283
+ raise ArgumentError unless marker.is_a? String
284
+ query_parameters[:marker] = marker
285
+ end
286
+
287
+ attempt(opts[:attempts]) do
288
+ method = opts[:head] ? :head : :get
289
+ result = @client.send(method, url, query_parameters, headers)
290
+ raise unless result.is_a? HTTP::Message
291
+
292
+ if result.status == 200
293
+ raise unless result.headers['Content-Type'] ==
294
+ 'application/x-json-stream; type=directory'
295
+
296
+ return true, result.headers if method == :head
297
+
298
+ json_chunks = result.body.split("\n")
299
+ sent_num_entries = result.headers['Result-Set-Size'].to_i
300
+
301
+ if (json_chunks.size != sent_num_entries && json_chunks.size != limit) ||
302
+ json_chunks.size > limit
303
+ raise CorruptResult
304
+ end
305
+
306
+ dir_entries = json_chunks.map { |i| JSON.parse(i) }
307
+
308
+ return dir_entries, result.headers
309
+ end
310
+
311
+ raise_error(result)
312
+ end
313
+ end
314
+
315
+
316
+
317
+ # Removes a directory from Manta at a given path.
318
+ #
319
+ # The path must start with /<user>/stor or /<user/public and point at an
320
+ # actual object.
321
+ #
322
+ # Returns true along with received HTTP headers.
323
+ #
324
+ # If there was an unrecoverable error, throws an exception. On connection or
325
+ # corruption errors, more attempts will be made; the number of attempts can
326
+ # be altered by passing in :attempts.
327
+ def delete_directory(dir_path, opts = {})
328
+ url = obj_url(dir_path)
329
+ headers = gen_headers(opts)
330
+
331
+ attempt(opts[:attempts]) do
332
+ result = @client.delete(url, nil, headers)
333
+ raise unless result.is_a? HTTP::Message
334
+
335
+ return true, result.headers if result.status == 204
336
+ raise_error(result)
337
+ end
338
+ end
339
+
340
+
341
+
342
+ # Creates a snaplink from one object in Manta at a given path to a different
343
+ # path.
344
+ #
345
+ # Both paths should start with /<user>/stor or /<user/public.
346
+ #
347
+ # Returns true along with received HTTP headers.
348
+ #
349
+ # If there was an unrecoverable error, throws an exception. On connection or
350
+ # corruption errors, more attempts will be made; the number of attempts can
351
+ # be altered by passing in :attempts.
352
+ def put_snaplink(orig_path, link_path, opts = {})
353
+ headers = gen_headers(opts)
354
+ headers.push([ 'Content-Type', 'application/json; type=link' ],
355
+ [ 'Location', obj_url(orig_path) ])
356
+
357
+ attempt(opts[:attempts]) do
358
+ result = @client.put(obj_url(link_path), nil, headers)
359
+ raise unless result.is_a? HTTP::Message
360
+
361
+ return true, result.headers if result.status == 204
362
+ raise_error(result)
363
+ end
364
+ end
365
+
366
+
367
+
368
+ # Creates a job in Manta.
369
+ #
370
+ # The job must be a hash, containing at minimum a :phases key. See README.md
371
+ # or the Manta docs to see the format and options for setting up a job on
372
+ # Manta; this method effectively just converts the job hash to JSON and sends
373
+ # to the Manta service.
374
+ #
375
+ # Returns the path for the new job, along with received HTTP headers.
376
+ #
377
+ # If there was an unrecoverable error, throws an exception. On connection or
378
+ # corruption errors, more attempts will be made; the number of attempts can
379
+ # be altered by passing in :attempts.
380
+ def create_job(job, opts = {})
381
+ raise ArgumentError unless job[:phases] || job['phases']
382
+
383
+ headers = gen_headers(opts)
384
+ headers.push([ 'Content-Type', 'application/json; type=job' ])
385
+ data = job.to_json
386
+
387
+ attempt(opts[:attempts]) do
388
+ result = @client.post(job_url(), data, headers)
389
+ raise unless result.is_a? HTTP::Message
390
+
391
+ if result.status == 201
392
+ location = result.headers['Location']
393
+ raise unless location
394
+
395
+ return location, result.headers
396
+ end
397
+
398
+ raise_error(result)
399
+ end
400
+ end
401
+
402
+
403
+
404
+ # Gets various information about a job in Manta at a given path.
405
+ #
406
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
407
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
408
+ #
409
+ # Returns a hash with job information, along with received HTTP headers.
410
+ #
411
+ # If there was an unrecoverable error, throws an exception. On connection or
412
+ # corruption errors, more attempts will be made; the number of attempts can
413
+ # be altered by passing in :attempts.
414
+ def get_job(job_path, opts = {})
415
+ url = job_url(job_path, '/live/status')
416
+ headers = gen_headers(opts)
417
+
418
+ attempt(opts[:attempts]) do
419
+ method = opts[:head] ? :head : :get
420
+ result = @client.send(method, url, nil, headers)
421
+ raise unless result.is_a? HTTP::Message
422
+
423
+ if result.status == 200
424
+ raise unless result.headers['Content-Type'] == 'application/json'
425
+
426
+ return true, result.headers if method == :head
427
+
428
+ job = JSON.parse(result.body)
429
+ return job, result.headers
430
+ end
431
+
432
+ raise_error(result)
433
+ end
434
+ end
435
+
436
+
437
+
438
+ # Gets errors that occured during the execution of a job in Manta at a given
439
+ # path.
440
+ #
441
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
442
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
443
+ #
444
+ # Returns an array of hashes, each hash containing information about an
445
+ # error; this information is best-effort by Manta, so it may not be complete.
446
+ # Also returns received HTTP headers.
447
+ #
448
+ # If there was an unrecoverable error, throws an exception. On connection or
449
+ # corruption errors, more attempts will be made; the number of attempts can
450
+ # be altered by passing in :attempts.
451
+ def get_job_errors(job_path, opts = {})
452
+ url = job_url(job_path, '/live/err')
453
+ headers = gen_headers(opts)
454
+
455
+ attempt(opts[:attempts]) do
456
+ method = opts[:head] ? :head : :get
457
+ result = @client.send(method, url, nil, headers)
458
+ raise unless result.is_a? HTTP::Message
459
+
460
+ if result.status == 200
461
+ raise unless result.headers['Content-Type'] ==
462
+ 'application/x-json-stream; type=job-error'
463
+
464
+ return true, result.headers if method == :head
465
+
466
+ json_chunks = result.body.split("\n")
467
+ errors = json_chunks.map { |i| JSON.parse(i) }
468
+
469
+ return errors, result.headers
470
+ end
471
+
472
+ raise_error(result)
473
+ end
474
+ end
475
+
476
+
477
+
478
+ # Cancels a running job in Manta at a given path.
479
+ #
480
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
481
+ #
482
+ # Returns true, along with received HTTP headers.
483
+ #
484
+ # If there was an unrecoverable error, throws an exception. On connection or
485
+ # corruption errors, more attempts will be made; the number of attempts can
486
+ # be altered by passing in :attempts.
487
+ def cancel_job(job_path, opts = {})
488
+ url = job_url(job_path, '/live/cancel')
489
+ headers = gen_headers(opts)
490
+
491
+ attempt(opts[:attempts]) do
492
+ result = @client.post(url, nil, headers)
493
+ raise unless result.is_a? HTTP::Message
494
+
495
+ return true, result.headers if result.status == 202
496
+ raise_error(result)
497
+ end
498
+ end
499
+
500
+
501
+
502
+ # Adds objects for a running job in Manta to process.
503
+ #
504
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
505
+ # running job. The obj_paths must be an array of paths, starting with
506
+ # /<user>/stor or /<user>/public, pointing at actual objects.
507
+ #
508
+ # Returns true, along with received HTTP headers.
509
+ #
510
+ # If there was an unrecoverable error, throws an exception. On connection or
511
+ # corruption errors, more attempts will be made; the number of attempts can
512
+ # be altered by passing in :attempts.
513
+ def add_job_keys(job_path, obj_paths, opts = {})
514
+ url = job_url(job_path, '/live/in')
515
+ headers = gen_headers(opts)
516
+ headers.push([ 'Content-Type', 'text/plain' ])
517
+
518
+ data = obj_paths.join("\n")
519
+
520
+ attempt(opts[:attempts]) do
521
+ result = @client.post(url, data, headers)
522
+ raise unless result.is_a? HTTP::Message
523
+
524
+ return true, result.headers if result.status == 204
525
+ raise_error(result)
526
+ end
527
+ end
528
+
529
+
530
+
531
+ # Inform Manta that no more objects will be added for processing by a job,
532
+ # and that the job should finish all phases and terminate.
533
+ #
534
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
535
+ # running job.
536
+ #
537
+ # Returns true, along with received HTTP headers.
538
+ #
539
+ # If there was an unrecoverable error, throws an exception. On connection or
540
+ # corruption errors, more attempts will be made; the number of attempts can
541
+ # be altered by passing in :attempts.
542
+ def end_job_input(job_path, opts = {})
543
+ url = job_url(job_path, '/live/in/end')
544
+ headers = gen_headers(opts)
545
+
546
+ attempt(opts[:attempts]) do
547
+ result = @client.post(url, nil, headers)
548
+ raise unless result.is_a? HTTP::Message
549
+
550
+ return true, result.headers if result.status == 202
551
+ raise_error(result)
552
+ end
553
+ end
554
+
555
+
556
+
557
+ # Get a list of objects that have been given to a Manta job for processing.
558
+ #
559
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
560
+ # running job.
561
+ #
562
+ # Returns an array of object paths, along with received HTTP headers.
563
+ #
564
+ # If there was an unrecoverable error, throws an exception. On connection or
565
+ # corruption errors, more attempts will be made; the number of attempts can
566
+ # be altered by passing in :attempts.
567
+ def get_job_input(job_path, opts = {})
568
+ get_job_state_streams(:in, job_path, opts)
569
+ end
570
+
571
+
572
+
573
+ # Get a list of objects that contain the intermediate results of a running
574
+ # Manta job.
575
+ #
576
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
577
+ # running job.
578
+ #
579
+ # Returns an array of object paths, along with received HTTP headers.
580
+ #
581
+ # If there was an unrecoverable error, throws an exception. On connection or
582
+ # corruption errors, more attempts will be made; the number of attempts can
583
+ # be altered by passing in :attempts.
584
+ def get_job_output(job_path, opts = {})
585
+ get_job_state_streams(:out, job_path, opts)
586
+ end
587
+
588
+
589
+
590
+ # Get a list of objects that had failures during processing in a Manta job.
591
+ #
592
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
593
+ # running job.
594
+ #
595
+ # Returns an array of object paths, along with received HTTP headers.
596
+ #
597
+ # If there was an unrecoverable error, throws an exception. On connection or
598
+ # corruption errors, more attempts will be made; the number of attempts can
599
+ # be altered by passing in :attempts.
600
+ def get_job_failures(job_path, opts = {})
601
+ get_job_state_streams(:fail, job_path, opts)
602
+ end
603
+
604
+
605
+
606
+ # Get lists of Manta jobs.
607
+ #
608
+ # The state indicates which kind of jobs to return. :running is for jobs
609
+ # that are currently processing, :done and :all should be obvious. Be careful
610
+ # of the latter two if you've run a lot of jobs -- the list could be quite
611
+ # long.
612
+ #
613
+ # Returns an array of hashes, each hash containing some information about a
614
+ # job. Also returns received HTTP headers.
615
+ #
616
+ # If there was an unrecoverable error, throws an exception. On connection or
617
+ # corruption errors, more attempts will be made; the number of attempts can
618
+ # be altered by passing in :attempts.
619
+ def list_jobs(state, opts = {})
620
+ raise ArgumentError unless [:all, :running, :done].include? state
621
+ state = nil if state == :all
622
+
623
+ headers = gen_headers(opts)
624
+
625
+ attempt(opts[:attempts]) do
626
+ # method = opts[:head] ? :head : :get
627
+ method = :get # until added to Manta service
628
+ result = @client.send(method, job_url(), { :state => state }, headers)
629
+ raise unless result.is_a? HTTP::Message
630
+
631
+ if result.status == 200
632
+ # return true, result.headers if method == :head
633
+ return [], result.headers if result.body.size == 0
634
+
635
+ raise unless result.headers['Content-Type'] ==
636
+ 'application/x-json-stream; type=job'
637
+
638
+ json_chunks = result.body.split("\n")
639
+ job_entries = json_chunks.map { |i| JSON.parse(i) }
640
+
641
+ return job_entries, result.headers
642
+ end
643
+
644
+ raise_error(result)
645
+ end
646
+ end
647
+
648
+
649
+
650
+ # Generates a signed URL which can be used by unauthenticated users to
651
+ # make a request to Manta at the given path. This is typically used to GET
652
+ # an object.
653
+ #
654
+ # expires is a Time object or integer representing time after epoch; this
655
+ # determines how long the signed URL will be valid for. The method is the HTTP
656
+ # method (:get, :put, :post, :delete) the signed URL is allowed to be used
657
+ # for. The path must start with /<user>/stor. Lastly, the optional args is an
658
+ # array containing pairs of query args that will be appended at the end of
659
+ # the URL.
660
+ #
661
+ # The returned URL is signed, and can be used either over HTTP or HTTPS until
662
+ # it reaches the expiry date.
663
+ def gen_signed_url(expires, method, path, args=[])
664
+ raise ArgumentError unless [:get, :put, :post, :delete].include? method
665
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
666
+
667
+ key_id = '/%s/keys/%s' % [@user, @fingerprint]
668
+
669
+ args.push([ 'expires', expires.to_i ])
670
+ args.push([ 'algorithm', @digest_name ])
671
+ args.push([ 'keyId', key_id ])
672
+
673
+ encoded_args = args.sort.map do |key, val|
674
+ # to comply with RFC 3986
675
+ CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
676
+ end.join('&')
677
+
678
+ method = method.to_s.upcase
679
+ host = @host.split('/').last
680
+
681
+ plaintext = "#{method}\n#{host}\n#{path}\n#{encoded_args}"
682
+ signature = @priv_key.sign(@digest, plaintext)
683
+ encoded_signature = CGI.escape(Base64.strict_encode64(signature))
684
+
685
+ host + path + '?' + encoded_args + '&signature=' + encoded_signature
686
+ end
687
+
688
+
689
+
690
+ # Create some Manta error classes
691
+ class MantaClientError < StandardError; end
692
+ for class_name in ERROR_CLASSES
693
+ MantaClient.const_set(class_name, Class.new(MantaClientError))
694
+ end
695
+
696
+
697
+
698
+ # ---------------------------------------------------------------------------
699
+ protected
700
+
701
+
702
+
703
+ # Fetch lists of objects that have a given status.
704
+ #
705
+ # type takes one of three values (:in, :out, fail), path must start with
706
+ # /<user>/jobs/<job UUID> and point at an actual job.
707
+ #
708
+ # Returns an array of object paths, along with received HTTP headers.
709
+ #
710
+ # If there was an unrecoverable error, throws an exception. On connection or
711
+ # corruption errors, more attempts will be made; the number of attempts can
712
+ # be altered by passing in :attempts.
713
+ def get_job_state_streams(type, path, opts)
714
+ raise ArgumentError unless [:in, :out, :fail].include? type
715
+
716
+ url = job_url(path, '/live/' + type.to_s)
717
+ headers = gen_headers(opts)
718
+
719
+ attempt(opts[:attempts]) do
720
+ #method = opts[:head] ? :head : :get
721
+ method = :get # until added to Manta service
722
+ result = @client.send(method, url, nil, headers)
723
+ raise unless result.is_a? HTTP::Message
724
+
725
+ if result.status == 200
726
+ raise unless result.headers['Content-Type'] == 'text/plain'
727
+ return true, result.headers if method == :head
728
+ paths = result.body.split("\n")
729
+ return paths, result.headers
730
+ end
731
+
732
+ raise_error(result)
733
+ end
734
+ end
735
+
736
+
737
+
738
+ # Returns a full URL for a given path to an object.
739
+ def obj_url(path)
740
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
741
+
742
+ @host + path
743
+ end
744
+
745
+
746
+
747
+ # Returns a full URL for a given path to a job.
748
+ def job_url(*args)
749
+ path = if args.size == 0
750
+ @job_base
751
+ else
752
+ raise ArgumentError unless args.first =~ JOB_PATH_REGEX
753
+ args.join('/')
754
+ end
755
+
756
+ @host + path
757
+ end
758
+
759
+
760
+
761
+ # Executes a block. If there is a connection- or corruption-related exception
762
+ # the block will be reexecuted up to the `tries' argument. It will sleep
763
+ # for an exponentially-increasing number of seconds between retries.
764
+ def attempt(tries, &blk)
765
+ if tries
766
+ raise ArgumentError unless tries > 0
767
+ else
768
+ tries ||= @attempts
769
+ end
770
+
771
+ attempt = 1
772
+
773
+ while true
774
+ begin
775
+ return yield blk
776
+ rescue Errno::ECONNREFUSED, HTTPClient::TimeoutError,
777
+ CorruptResult => e
778
+ raise e if attempt == tries
779
+ sleep 2 ** attempt
780
+ attempt += 1
781
+ end
782
+ end
783
+ end
784
+
785
+
786
+
787
+ # Creates headers to be given to the HTTP client and sent to the Manta
788
+ # service. The most important is the Authorization header, without which
789
+ # none of this class would work.
790
+ def gen_headers(opts)
791
+ now = Time.now.httpdate
792
+ sig = gen_signature('date: ' + now)
793
+
794
+ headers = [[ 'Date', now ],
795
+ [ 'Authorization', sig ],
796
+ [ 'User-Agent', HTTP_AGENT ],
797
+ [ 'Accept-Version', '~1.0' ]]
798
+
799
+
800
+ # headers for conditional requests (dates)
801
+ for arg, conditional in [[:if_modified_since, 'If-Modified-Since' ],
802
+ [:if_unmodified_since, 'If-Unmodified-Since']]
803
+ date = opts[arg]
804
+ next unless date
805
+
806
+ date = Time.parse(date.to_s) unless date.kind_of? Time
807
+ headers.push([conditional, date])
808
+ end
809
+
810
+ # headers for conditional requests (etags)
811
+ for arg, conditional in [[:if_match, 'If-Match' ],
812
+ [:if_none_match, 'If-None-Match']]
813
+ etag = opts[arg]
814
+ next unless etag
815
+
816
+ raise ArgumentError unless etag.kind_of? String
817
+ headers.push([conditional, etag])
818
+ end
819
+
820
+ origin = opts[:origin]
821
+ if origin
822
+ raise ArgumentError unless origin == 'null' || origin =~ CORS_ORIGIN_REGEX
823
+ headers.push([ 'Origin', origin ])
824
+ end
825
+
826
+ # add md5 hash when sending data
827
+ data = opts[:data]
828
+ if data
829
+ md5 = Digest::MD5.base64digest(data)
830
+ headers.push([ 'Content-MD5', md5 ])
831
+ end
832
+
833
+ return headers
834
+ end
835
+
836
+
837
+
838
+ # Do some sanity checks and create CORS-related headers
839
+ #
840
+ # For more details, see http://www.w3.org/TR/cors/ and
841
+ # https://developer.mozilla.org/en-US/docs/HTTP/Access_control_CORS#Access-Control-Expose-Headers
842
+ def gen_cors_headers(opts)
843
+ headers = []
844
+
845
+ allow_credentials = opts[:access_control_allow_credentials]
846
+ if allow_credentials
847
+ allow_credentials = allow_credentials.to_s
848
+ raise ArgumentError unless allow_credentials == 'true' ||
849
+ allow_credentials == 'false'
850
+ headers.push([ 'Access-Control-Allow-Credentials', allow_credentials ])
851
+ end
852
+
853
+ allow_headers = opts[:access_control_allow_headers]
854
+ if allow_headers
855
+ raise ArgumentError unless allow_headers =~ CORS_HEADERS_REGEX
856
+ allow_headers = allow_headers.split(', ').map(&:downcase).sort.join(', ')
857
+ headers.push([ 'Access-Control-Allow-Headers', allow_headers ])
858
+ end
859
+
860
+ allow_methods = opts[:access_control_allow_methods]
861
+ if allow_methods
862
+ raise ArgumentError unless allow_methods.kind_of? String
863
+
864
+ unknown_methods = allow_methods.split(', ').reject do |str|
865
+ CORS_METHODS.include? str
866
+ end
867
+ raise ArgumentError unless unknown_methods.size == 0
868
+
869
+ headers.push([ 'Access-Control-Allow-Methods', allow_methods ])
870
+ end
871
+
872
+ allow_origin = opts[:access_control_allow_origin]
873
+ if allow_origin
874
+ raise ArgumentError unless allow_origin.kind_of? String
875
+ raise ArgumentError unless allow_origin == '*' ||
876
+ allow_origin == 'null' ||
877
+ allow_origin =~ CORS_ORIGIN_REGEX
878
+ headers.push([ 'Access-Control-Allow-Origin', allow_origin ])
879
+ end
880
+
881
+ expose_headers = opts[:access_control_expose_headers]
882
+ if expose_headers
883
+ raise ArgumentError unless expose_headers =~ CORS_HEADERS_REGEX
884
+ expose_headers = expose_headers.split(', ').map(&:downcase).sort.join(', ')
885
+ headers.push([ 'Access-Control-Expose-Headers', expose_headers ])
886
+ end
887
+
888
+ max_age = opts[:access_control_max_age]
889
+ if max_age
890
+ raise ArgumentError unless max_age.kind_of?(Integer) && max_age >= 0
891
+ headers.push([ 'Access-Control-Max-Age', max_age.to_s ])
892
+ end
893
+
894
+ headers
895
+ end
896
+
897
+
898
+
899
+ # Given a chunk of data, creates an HTTP signature which the Manta service
900
+ # understands and uses for authentication.
901
+ def gen_signature(data)
902
+ raise ArgumentError unless data
903
+
904
+ sig = @priv_key.sign(@digest, data)
905
+ base64sig = Base64.strict_encode64(sig)
906
+
907
+ return HTTP_SIGNATURE % [@user, @fingerprint, @digest_name, base64sig]
908
+ end
909
+
910
+
911
+
912
+ # Raises an appropriate exception given the HTTP response. If a 40* is
913
+ # returned, attempts to look up an appropriate error class and raise,
914
+ # otherwise raises an UnknownError.
915
+ def raise_error(result)
916
+ raise unless result.is_a? HTTP::Message
917
+
918
+ err = JSON.parse(result.body)
919
+ klass = MantaClient.const_get err['code']
920
+ raise klass, err['message']
921
+ rescue NameError, TypeError, JSON::ParserError
922
+ raise UnknownError, result.status.to_s + ': ' + result.body
923
+ end
924
+ end
925
+