ruby-manta 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/example.rb ADDED
@@ -0,0 +1,78 @@
1
+ require 'ruby-manta'
2
+
3
+ # You'll need to provide these four environment variables to run this
4
+ # example. E.g.:
5
+ # USER=john KEY=~/.ssh/john HOST=https://us-east.manta.joyent.com DIR=. ruby example.rb
6
+ host = ENV['HOST']
7
+ user = ENV['USER']
8
+ priv_key = ENV['KEY' ]
9
+ upload_dir = ENV['DIR' ]
10
+
11
+ # Read in private key, create a MantaClient instance. MantaClient is
12
+ # thread-safe and provides persistent connections with pooling, so you'll
13
+ # only ever need a single instance of this in a program.
14
+ priv_key_data = File.read(priv_key)
15
+ client = MantaClient.new(host, user, priv_key_data,
16
+ :disable_ssl_verification => true)
17
+
18
+ # Create an directory in Manta solely for this example run.
19
+ dir_path = '/' + user + '/stor/ruby-manta-example'
20
+ client.put_directory(dir_path)
21
+
22
+ # Upload files in a local directory to the Manta directory.
23
+ file_paths = Dir[upload_dir + '/*'].select { |p| File.file? p }
24
+ file_paths.each do |file_path|
25
+ file_name = File.basename(file_path)
26
+ # Be careful about binary files and file encodings in Ruby 1.9. If you don't
27
+ # use ASCII-8BIT (forced by 'rb' below), expect timeouts while PUTing an
28
+ # object.
29
+ file_data = File.open(file_path, 'rb') { |f| f.read }
30
+ client.put_object(dir_path + '/' + file_name, file_data)
31
+ end
32
+
33
+ # This example job runs the wc UNIX command on every object for the
34
+ # map phase, then uses awk during reduce to sum up the three numbers each wc
35
+ # returned.
36
+ job_details = {
37
+ :name => 'total word count',
38
+ :phases => [ {
39
+ :exec => 'wc'
40
+ }, {
41
+ :type => 'reduce',
42
+ :exec => "awk '{ l += $1; w += $2; c += $3 } END { print l, w, c }'"
43
+ } ]
44
+ }
45
+
46
+ # Create the job, then add the objects the job should operate on.
47
+ job_path, _ = client.create_job(job_details)
48
+
49
+ entries, _ = client.list_directory(dir_path)
50
+ obj_paths = entries.select { |e| e['type'] == 'object' }.
51
+ map { |e| dir_path + '/' + e['name'] }
52
+
53
+ client.add_job_keys(job_path, obj_paths)
54
+
55
+ # Tell Manta we're done adding objects to the job. Manta doesn't need this
56
+ # to start running a job -- you can see map results without it, for
57
+ # example -- but reduce phases in particular depend on all mapping
58
+ # finishing.
59
+ client.end_job_input(job_path)
60
+
61
+ # Poll until Manta finishes the job.
62
+ begin
63
+ sleep 1
64
+ job, _ = client.get_job(job_path)
65
+ end while job['state'] != 'done'
66
+
67
+ # We know in this case there will be only one result. Fetch it and
68
+ # display it.
69
+ results, _ = client.get_job_output(job_path)
70
+ data, _ = client.get_object(results[0])
71
+ puts data
72
+
73
+ # Clean up; remove objects and directory.
74
+ obj_paths.each do |obj_path|
75
+ client.delete_object(obj_path)
76
+ end
77
+
78
+ client.delete_directory(dir_path)
data/lib/ruby-manta.rb ADDED
@@ -0,0 +1,925 @@
1
+ # Copyright (c) 2012, Joyent, Inc. All rights reserved.
2
+ #
3
+ # ruby-manta is a simple low-abstraction layer which communicates with Joyent's
4
+ # Manta service.
5
+ #
6
+ # Manta is an HTTP-accessible object store supporting UNIX-based map-reduce
7
+ # jobs. Through ruby-manta a programmer can save/overwrite/delete objects
8
+ # stored on a Manta service, or run map-reduce jobs over those objects.
9
+ #
10
+ # ruby-manta should be thread-safe, and supports pooling of keep-alive
11
+ # connections to the same server (through HTTPClient). It only relies on the
12
+ # standard library and two pure Ruby libraries, so it should work anywhere.
13
+ #
14
+ # For more information about Manta and general ruby-manta usage, please see
15
+ # README.md.
16
+
17
+
18
+
19
+ require 'openssl'
20
+ require 'net/ssh'
21
+ require 'httpclient'
22
+ require 'base64'
23
+ require 'digest'
24
+ require 'time'
25
+ require 'json'
26
+ require 'cgi'
27
+
28
+ require File.expand_path('../version', __FILE__)
29
+
30
+
31
+
32
+ class MantaClient
33
+ DEFAULT_ATTEMPTS = 3
34
+ DEFAULT_CONNECT_TIMEOUT = 5
35
+ DEFAULT_SEND_TIMEOUT = 60
36
+ DEFAULT_RECEIVE_TIMEOUT = 60
37
+ MAX_LIMIT = 1000
38
+ HTTP_AGENT = "ruby-manta/#{LIB_VERSION} (#{RUBY_PLATFORM}; #{OpenSSL::OPENSSL_VERSION}) ruby/#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}"
39
+ HTTP_SIGNATURE = 'Signature keyId="/%s/keys/%s",algorithm="%s",signature="%s"'
40
+ OBJ_PATH_REGEX = Regexp.new('^/.+/(?:stor|public|reports)(?:/|$)')
41
+ JOB_PATH_REGEX = Regexp.new('^/.+?/jobs/.+?(?:/live|$)')
42
+
43
+ # match one or more protocol and hostnames, with optional port numbers.
44
+ # E.g. "http://example.com https://example.com:8443"
45
+ CORS_ORIGIN_REGEX = Regexp.new('^\w+://[^\s\:]+(?:\:\d+)?' +
46
+ '(?:\s\w+://[^\s\:]+(?:\:\d+)?)*$')
47
+ CORS_HEADERS_REGEX = Regexp.new('^[\w-]+(?:, [\w-]+)*$')
48
+ CORS_METHODS = [ 'GET', 'POST', 'PUT', 'DELETE', 'OPTIONS' ]
49
+
50
+ ERROR_CLASSES = [ 'AuthorizationFailed', 'AuthSchemeNotAllowed',
51
+ 'BadRequest', 'Checksum', 'ConcurrentRequest',
52
+ 'ContentLength', 'ContentMD5Mismatch',
53
+ 'DirectoryDoesNotExist', 'DirectoryExists',
54
+ 'DirectoryNotEmpty', 'DirectoryOperation',
55
+ 'EntityExists', 'Internal', 'InvalidArgument',
56
+ 'InvalidAuthToken', 'InvalidCredentials',
57
+ 'InvalidDurabilityLevel', 'InvalidJob', 'InvalidKeyId',
58
+ 'InvalidLink', 'InvalidSignature', 'InvalidJobState',
59
+ 'JobNotFound', 'JobState', 'KeyDoesNotExist',
60
+ 'LinkNotFound', 'LinkNotObject', 'LinkRequired',
61
+ 'NotAcceptable', 'NotEnoughSpace', 'ParentNotDirectory',
62
+ 'PreconditionFailed', 'PreSignedRequest',
63
+ 'RequestEntityTooLarge', 'ResourceNotFound',
64
+ 'RootDirectory', 'SecureTransportRequired',
65
+ 'ServiceUnavailable', 'SourceObjectNotFound',
66
+ 'SSLRequired', 'TaskInit', 'UploadTimeout',
67
+ 'UserDoesNotExist', 'UserTaskError',
68
+ # and errors that are specific to this class:
69
+ 'CorruptResult', 'UnknownError',
70
+ 'UnsupportedKey' ]
71
+
72
+
73
+
74
+ # Initialize a MantaClient instance.
75
+ #
76
+ # priv_key_data is data read directly from an SSH private key (i.e. RFC 4716
77
+ # format). The method can also accept several optional args: :connect_timeout,
78
+ # :send_timeout, :receive_timeout, :disable_ssl_verification and :attempts.
79
+ # The timeouts are in seconds, and :attempts determines the default number of
80
+ # attempts each method will make upon receiving recoverable errors.
81
+ #
82
+ # Will throw an exception if given a key whose format it doesn't understand.
83
+ def initialize(host, user, priv_key_data, opts = {})
84
+ raise ArgumentError unless host =~ /^https{0,1}:\/\/.*[^\/]/
85
+ raise ArgumentError unless user.is_a?(String) && user.size > 0
86
+
87
+ @host = host
88
+ @user = user
89
+
90
+ @attempts = opts[:attempts] || DEFAULT_ATTEMPTS
91
+ raise ArgumentError unless @attempts > 0
92
+
93
+ if priv_key_data =~ /BEGIN RSA/
94
+ @digest = OpenSSL::Digest::SHA1.new
95
+ @digest_name = 'rsa-sha1'
96
+ algorithm = OpenSSL::PKey::RSA
97
+ elsif priv_key_data =~ /BEGIN DSA/
98
+ @digest = OpenSSL::Digest::DSS1.new
99
+ @digest_name = 'dsa-sha1'
100
+ algorithm = OpenSSL::PKey::DSA
101
+ else
102
+ raise UnsupportedKeyError
103
+ end
104
+
105
+ @priv_key = algorithm.new(priv_key_data)
106
+ @fingerprint = OpenSSL::Digest::MD5.hexdigest(@priv_key.to_blob).
107
+ scan(/../).join(':')
108
+
109
+ @client = HTTPClient.new
110
+ @client.connect_timeout = opts[:connect_timeout] || DEFAULT_CONNECT_TIMEOUT
111
+ @client.send_timeout = opts[:send_timeout ] || DEFAULT_SEND_TIMEOUT
112
+ @client.receive_timeout = opts[:receive_timeout] || DEFAULT_RECEIVE_TIMEOUT
113
+ @client.ssl_config.verify_mode = nil if opts[:disable_ssl_verification]
114
+
115
+ @job_base = '/' + user + '/jobs'
116
+ end
117
+
118
+
119
+
120
+ # Uploads object data to Manta to the given path, along with a computed MD5
121
+ # hash.
122
+ #
123
+ # The path must start with /<user>/stor or /<user/public. Data can be any
124
+ # sequence of octets. The HTTP Content-Type stored on Manta can be set
125
+ # with an optional :content_type argument; the default is
126
+ # application/octet-stream. The number of distributed replicates of an object
127
+ # stored in Manta can be set with an optional :durability_level; the default
128
+ # is 2.
129
+ #
130
+ # Returns true along with received HTTP headers.
131
+ #
132
+ # If there was an unrecoverable error, throws an exception. On connection or
133
+ # corruption errors, more attempts will be made; the number of attempts can
134
+ # be altered by passing in :attempts.
135
+ def put_object(obj_path, data, opts = {})
136
+ url = obj_url(obj_path)
137
+
138
+ opts[:data] = data
139
+ headers = gen_headers(opts)
140
+
141
+ cors_headers = gen_cors_headers(opts)
142
+ headers = headers.concat(cors_headers)
143
+
144
+ durability_level = opts[:durability_level]
145
+ if durability_level
146
+ raise ArgumentError unless durability_level > 0
147
+ headers.push([ 'Durability-Level', durability_level ])
148
+ end
149
+
150
+ content_type = opts[:content_type]
151
+ if content_type
152
+ raise ArgumentError unless content_type.is_a? String
153
+ headers.push([ 'Content-Type', content_type ])
154
+ end
155
+
156
+ attempt(opts[:attempts]) do
157
+ result = @client.put(url, data, headers)
158
+ raise unless result.is_a? HTTP::Message
159
+
160
+ return true, result.headers if [204, 304].include? result.status
161
+ raise_error(result)
162
+ end
163
+ end
164
+
165
+
166
+
167
+ # Get an object from Manta at a given path, and checks it's uncorrupted.
168
+ #
169
+ # The path must start with /<user>/stor or /<user/public and point at an
170
+ # actual object, as well as output objects for jobs. :head => true can
171
+ # optionally be passed in to do a HEAD instead of a GET.
172
+ #
173
+ # Returns the retrieved data along with received HTTP headers.
174
+ #
175
+ # If there was an unrecoverable error, throws an exception. On connection or
176
+ # corruption errors, more attempts will be made; the number of attempts can
177
+ # be altered by passing in :attempts.
178
+ def get_object(obj_path, opts = {})
179
+ url = obj_url(obj_path)
180
+ headers = gen_headers(opts)
181
+
182
+ attempt(opts[:attempts]) do
183
+ method = opts[:head] ? :head : :get
184
+ result = @client.send(method, url, nil, headers)
185
+ raise unless result.is_a? HTTP::Message
186
+
187
+ if result.status == 200
188
+ return true, result.headers if method == :head
189
+
190
+ sent_md5 = result.headers['Content-MD5']
191
+ received_md5 = Digest::MD5.base64digest(result.body)
192
+ raise CorruptResult if sent_md5 != received_md5
193
+
194
+ return result.body, result.headers
195
+ elsif result.status == 304
196
+ return nil, result.headers
197
+ end
198
+
199
+ raise_error(result)
200
+ end
201
+ end
202
+
203
+
204
+
205
+ # Deletes an object off Manta at a given path.
206
+ #
207
+ # The path must start with /<user>/stor or /<user/public and point at an
208
+ # actual object.
209
+ #
210
+ # Returns true along with received HTTP headers.
211
+ #
212
+ # If there was an unrecoverable error, throws an exception. On connection or
213
+ # corruption errors, more attempts will be made; the number of attempts can
214
+ # be altered by passing in :attempts.
215
+ def delete_object(obj_path, opts = {})
216
+ url = obj_url(obj_path)
217
+ headers = gen_headers(opts)
218
+
219
+ attempt(opts[:attempts]) do
220
+ result = @client.delete(url, nil, headers)
221
+ raise unless result.is_a? HTTP::Message
222
+
223
+ return true, result.headers if result.status == 204
224
+ raise_error(result)
225
+ end
226
+ end
227
+
228
+
229
+
230
+ # Creates a directory on Manta at a given path.
231
+ #
232
+ # The path must start with /<user>/stor or /<user/public.
233
+ #
234
+ # Returns true along with received HTTP headers.
235
+ #
236
+ # If there was an unrecoverable error, throws an exception. On connection or
237
+ # corruption errors, more attempts will be made; the number of attempts can
238
+ # be altered by passing in :attempts.
239
+ def put_directory(dir_path, opts = {})
240
+ url = obj_url(dir_path)
241
+ headers = gen_headers(opts)
242
+ headers.push([ 'Content-Type', 'application/json; type=directory' ])
243
+
244
+ cors_headers = gen_cors_headers(opts)
245
+ headers = headers.concat(cors_headers)
246
+
247
+ attempt(opts[:attempts]) do
248
+ result = @client.put(url, nil, headers)
249
+ raise unless result.is_a? HTTP::Message
250
+
251
+ return true, result.headers if result.status == 204
252
+ raise_error(result)
253
+ end
254
+ end
255
+
256
+
257
+
258
+ # Gets a lexicographically sorted directory listing on Manta at a given path,
259
+ #
260
+ # The path must start with /<user>/stor or /<user/public and point at an
261
+ # actual directory. :limit optionally changes the maximum number of entries;
262
+ # the default is 1000. If given :marker, an object name in the directory,
263
+ # returned directory entries will begin from that point. :head => true can
264
+ # optionally be passed in to do a HEAD instead of a GET.
265
+ #
266
+ # Returns an array of hash objects, each object representing a directory
267
+ # entry. Also returns the received HTTP headers.
268
+ #
269
+ # If there was an unrecoverable error, throws an exception. On connection or
270
+ # corruption errors, more attempts will be made; the number of attempts can
271
+ # be altered by passing in :attempts.
272
+ def list_directory(dir_path, opts = {})
273
+ url = obj_url(dir_path)
274
+ headers = gen_headers(opts)
275
+ query_parameters = {}
276
+
277
+ limit = opts[:limit] || MAX_LIMIT
278
+ raise ArgumentError unless 0 < limit && limit <= MAX_LIMIT
279
+ query_parameters[:limit] = limit
280
+
281
+ marker = opts[:marker]
282
+ if marker
283
+ raise ArgumentError unless marker.is_a? String
284
+ query_parameters[:marker] = marker
285
+ end
286
+
287
+ attempt(opts[:attempts]) do
288
+ method = opts[:head] ? :head : :get
289
+ result = @client.send(method, url, query_parameters, headers)
290
+ raise unless result.is_a? HTTP::Message
291
+
292
+ if result.status == 200
293
+ raise unless result.headers['Content-Type'] ==
294
+ 'application/x-json-stream; type=directory'
295
+
296
+ return true, result.headers if method == :head
297
+
298
+ json_chunks = result.body.split("\n")
299
+ sent_num_entries = result.headers['Result-Set-Size'].to_i
300
+
301
+ if (json_chunks.size != sent_num_entries && json_chunks.size != limit) ||
302
+ json_chunks.size > limit
303
+ raise CorruptResult
304
+ end
305
+
306
+ dir_entries = json_chunks.map { |i| JSON.parse(i) }
307
+
308
+ return dir_entries, result.headers
309
+ end
310
+
311
+ raise_error(result)
312
+ end
313
+ end
314
+
315
+
316
+
317
+ # Removes a directory from Manta at a given path.
318
+ #
319
+ # The path must start with /<user>/stor or /<user/public and point at an
320
+ # actual object.
321
+ #
322
+ # Returns true along with received HTTP headers.
323
+ #
324
+ # If there was an unrecoverable error, throws an exception. On connection or
325
+ # corruption errors, more attempts will be made; the number of attempts can
326
+ # be altered by passing in :attempts.
327
+ def delete_directory(dir_path, opts = {})
328
+ url = obj_url(dir_path)
329
+ headers = gen_headers(opts)
330
+
331
+ attempt(opts[:attempts]) do
332
+ result = @client.delete(url, nil, headers)
333
+ raise unless result.is_a? HTTP::Message
334
+
335
+ return true, result.headers if result.status == 204
336
+ raise_error(result)
337
+ end
338
+ end
339
+
340
+
341
+
342
+ # Creates a snaplink from one object in Manta at a given path to a different
343
+ # path.
344
+ #
345
+ # Both paths should start with /<user>/stor or /<user/public.
346
+ #
347
+ # Returns true along with received HTTP headers.
348
+ #
349
+ # If there was an unrecoverable error, throws an exception. On connection or
350
+ # corruption errors, more attempts will be made; the number of attempts can
351
+ # be altered by passing in :attempts.
352
+ def put_snaplink(orig_path, link_path, opts = {})
353
+ headers = gen_headers(opts)
354
+ headers.push([ 'Content-Type', 'application/json; type=link' ],
355
+ [ 'Location', obj_url(orig_path) ])
356
+
357
+ attempt(opts[:attempts]) do
358
+ result = @client.put(obj_url(link_path), nil, headers)
359
+ raise unless result.is_a? HTTP::Message
360
+
361
+ return true, result.headers if result.status == 204
362
+ raise_error(result)
363
+ end
364
+ end
365
+
366
+
367
+
368
+ # Creates a job in Manta.
369
+ #
370
+ # The job must be a hash, containing at minimum a :phases key. See README.md
371
+ # or the Manta docs to see the format and options for setting up a job on
372
+ # Manta; this method effectively just converts the job hash to JSON and sends
373
+ # to the Manta service.
374
+ #
375
+ # Returns the path for the new job, along with received HTTP headers.
376
+ #
377
+ # If there was an unrecoverable error, throws an exception. On connection or
378
+ # corruption errors, more attempts will be made; the number of attempts can
379
+ # be altered by passing in :attempts.
380
+ def create_job(job, opts = {})
381
+ raise ArgumentError unless job[:phases] || job['phases']
382
+
383
+ headers = gen_headers(opts)
384
+ headers.push([ 'Content-Type', 'application/json; type=job' ])
385
+ data = job.to_json
386
+
387
+ attempt(opts[:attempts]) do
388
+ result = @client.post(job_url(), data, headers)
389
+ raise unless result.is_a? HTTP::Message
390
+
391
+ if result.status == 201
392
+ location = result.headers['Location']
393
+ raise unless location
394
+
395
+ return location, result.headers
396
+ end
397
+
398
+ raise_error(result)
399
+ end
400
+ end
401
+
402
+
403
+
404
+ # Gets various information about a job in Manta at a given path.
405
+ #
406
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
407
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
408
+ #
409
+ # Returns a hash with job information, along with received HTTP headers.
410
+ #
411
+ # If there was an unrecoverable error, throws an exception. On connection or
412
+ # corruption errors, more attempts will be made; the number of attempts can
413
+ # be altered by passing in :attempts.
414
+ def get_job(job_path, opts = {})
415
+ url = job_url(job_path, '/live/status')
416
+ headers = gen_headers(opts)
417
+
418
+ attempt(opts[:attempts]) do
419
+ method = opts[:head] ? :head : :get
420
+ result = @client.send(method, url, nil, headers)
421
+ raise unless result.is_a? HTTP::Message
422
+
423
+ if result.status == 200
424
+ raise unless result.headers['Content-Type'] == 'application/json'
425
+
426
+ return true, result.headers if method == :head
427
+
428
+ job = JSON.parse(result.body)
429
+ return job, result.headers
430
+ end
431
+
432
+ raise_error(result)
433
+ end
434
+ end
435
+
436
+
437
+
438
+ # Gets errors that occured during the execution of a job in Manta at a given
439
+ # path.
440
+ #
441
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
442
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
443
+ #
444
+ # Returns an array of hashes, each hash containing information about an
445
+ # error; this information is best-effort by Manta, so it may not be complete.
446
+ # Also returns received HTTP headers.
447
+ #
448
+ # If there was an unrecoverable error, throws an exception. On connection or
449
+ # corruption errors, more attempts will be made; the number of attempts can
450
+ # be altered by passing in :attempts.
451
+ def get_job_errors(job_path, opts = {})
452
+ url = job_url(job_path, '/live/err')
453
+ headers = gen_headers(opts)
454
+
455
+ attempt(opts[:attempts]) do
456
+ method = opts[:head] ? :head : :get
457
+ result = @client.send(method, url, nil, headers)
458
+ raise unless result.is_a? HTTP::Message
459
+
460
+ if result.status == 200
461
+ raise unless result.headers['Content-Type'] ==
462
+ 'application/x-json-stream; type=job-error'
463
+
464
+ return true, result.headers if method == :head
465
+
466
+ json_chunks = result.body.split("\n")
467
+ errors = json_chunks.map { |i| JSON.parse(i) }
468
+
469
+ return errors, result.headers
470
+ end
471
+
472
+ raise_error(result)
473
+ end
474
+ end
475
+
476
+
477
+
478
+ # Cancels a running job in Manta at a given path.
479
+ #
480
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
481
+ #
482
+ # Returns true, along with received HTTP headers.
483
+ #
484
+ # If there was an unrecoverable error, throws an exception. On connection or
485
+ # corruption errors, more attempts will be made; the number of attempts can
486
+ # be altered by passing in :attempts.
487
+ def cancel_job(job_path, opts = {})
488
+ url = job_url(job_path, '/live/cancel')
489
+ headers = gen_headers(opts)
490
+
491
+ attempt(opts[:attempts]) do
492
+ result = @client.post(url, nil, headers)
493
+ raise unless result.is_a? HTTP::Message
494
+
495
+ return true, result.headers if result.status == 202
496
+ raise_error(result)
497
+ end
498
+ end
499
+
500
+
501
+
502
+ # Adds objects for a running job in Manta to process.
503
+ #
504
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
505
+ # running job. The obj_paths must be an array of paths, starting with
506
+ # /<user>/stor or /<user>/public, pointing at actual objects.
507
+ #
508
+ # Returns true, along with received HTTP headers.
509
+ #
510
+ # If there was an unrecoverable error, throws an exception. On connection or
511
+ # corruption errors, more attempts will be made; the number of attempts can
512
+ # be altered by passing in :attempts.
513
+ def add_job_keys(job_path, obj_paths, opts = {})
514
+ url = job_url(job_path, '/live/in')
515
+ headers = gen_headers(opts)
516
+ headers.push([ 'Content-Type', 'text/plain' ])
517
+
518
+ data = obj_paths.join("\n")
519
+
520
+ attempt(opts[:attempts]) do
521
+ result = @client.post(url, data, headers)
522
+ raise unless result.is_a? HTTP::Message
523
+
524
+ return true, result.headers if result.status == 204
525
+ raise_error(result)
526
+ end
527
+ end
528
+
529
+
530
+
531
+ # Inform Manta that no more objects will be added for processing by a job,
532
+ # and that the job should finish all phases and terminate.
533
+ #
534
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
535
+ # running job.
536
+ #
537
+ # Returns true, along with received HTTP headers.
538
+ #
539
+ # If there was an unrecoverable error, throws an exception. On connection or
540
+ # corruption errors, more attempts will be made; the number of attempts can
541
+ # be altered by passing in :attempts.
542
+ def end_job_input(job_path, opts = {})
543
+ url = job_url(job_path, '/live/in/end')
544
+ headers = gen_headers(opts)
545
+
546
+ attempt(opts[:attempts]) do
547
+ result = @client.post(url, nil, headers)
548
+ raise unless result.is_a? HTTP::Message
549
+
550
+ return true, result.headers if result.status == 202
551
+ raise_error(result)
552
+ end
553
+ end
554
+
555
+
556
+
557
+ # Get a list of objects that have been given to a Manta job for processing.
558
+ #
559
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
560
+ # running job.
561
+ #
562
+ # Returns an array of object paths, along with received HTTP headers.
563
+ #
564
+ # If there was an unrecoverable error, throws an exception. On connection or
565
+ # corruption errors, more attempts will be made; the number of attempts can
566
+ # be altered by passing in :attempts.
567
+ def get_job_input(job_path, opts = {})
568
+ get_job_state_streams(:in, job_path, opts)
569
+ end
570
+
571
+
572
+
573
+ # Get a list of objects that contain the intermediate results of a running
574
+ # Manta job.
575
+ #
576
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
577
+ # running job.
578
+ #
579
+ # Returns an array of object paths, along with received HTTP headers.
580
+ #
581
+ # If there was an unrecoverable error, throws an exception. On connection or
582
+ # corruption errors, more attempts will be made; the number of attempts can
583
+ # be altered by passing in :attempts.
584
+ def get_job_output(job_path, opts = {})
585
+ get_job_state_streams(:out, job_path, opts)
586
+ end
587
+
588
+
589
+
590
+ # Get a list of objects that had failures during processing in a Manta job.
591
+ #
592
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
593
+ # running job.
594
+ #
595
+ # Returns an array of object paths, along with received HTTP headers.
596
+ #
597
+ # If there was an unrecoverable error, throws an exception. On connection or
598
+ # corruption errors, more attempts will be made; the number of attempts can
599
+ # be altered by passing in :attempts.
600
+ def get_job_failures(job_path, opts = {})
601
+ get_job_state_streams(:fail, job_path, opts)
602
+ end
603
+
604
+
605
+
606
+ # Get lists of Manta jobs.
607
+ #
608
+ # The state indicates which kind of jobs to return. :running is for jobs
609
+ # that are currently processing, :done and :all should be obvious. Be careful
610
+ # of the latter two if you've run a lot of jobs -- the list could be quite
611
+ # long.
612
+ #
613
+ # Returns an array of hashes, each hash containing some information about a
614
+ # job. Also returns received HTTP headers.
615
+ #
616
+ # If there was an unrecoverable error, throws an exception. On connection or
617
+ # corruption errors, more attempts will be made; the number of attempts can
618
+ # be altered by passing in :attempts.
619
+ def list_jobs(state, opts = {})
620
+ raise ArgumentError unless [:all, :running, :done].include? state
621
+ state = nil if state == :all
622
+
623
+ headers = gen_headers(opts)
624
+
625
+ attempt(opts[:attempts]) do
626
+ # method = opts[:head] ? :head : :get
627
+ method = :get # until added to Manta service
628
+ result = @client.send(method, job_url(), { :state => state }, headers)
629
+ raise unless result.is_a? HTTP::Message
630
+
631
+ if result.status == 200
632
+ # return true, result.headers if method == :head
633
+ return [], result.headers if result.body.size == 0
634
+
635
+ raise unless result.headers['Content-Type'] ==
636
+ 'application/x-json-stream; type=job'
637
+
638
+ json_chunks = result.body.split("\n")
639
+ job_entries = json_chunks.map { |i| JSON.parse(i) }
640
+
641
+ return job_entries, result.headers
642
+ end
643
+
644
+ raise_error(result)
645
+ end
646
+ end
647
+
648
+
649
+
650
+ # Generates a signed URL which can be used by unauthenticated users to
651
+ # make a request to Manta at the given path. This is typically used to GET
652
+ # an object.
653
+ #
654
+ # expires is a Time object or integer representing time after epoch; this
655
+ # determines how long the signed URL will be valid for. The method is the HTTP
656
+ # method (:get, :put, :post, :delete) the signed URL is allowed to be used
657
+ # for. The path must start with /<user>/stor. Lastly, the optional args is an
658
+ # array containing pairs of query args that will be appended at the end of
659
+ # the URL.
660
+ #
661
+ # The returned URL is signed, and can be used either over HTTP or HTTPS until
662
+ # it reaches the expiry date.
663
+ def gen_signed_url(expires, method, path, args=[])
664
+ raise ArgumentError unless [:get, :put, :post, :delete].include? method
665
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
666
+
667
+ key_id = '/%s/keys/%s' % [@user, @fingerprint]
668
+
669
+ args.push([ 'expires', expires.to_i ])
670
+ args.push([ 'algorithm', @digest_name ])
671
+ args.push([ 'keyId', key_id ])
672
+
673
+ encoded_args = args.sort.map do |key, val|
674
+ # to comply with RFC 3986
675
+ CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
676
+ end.join('&')
677
+
678
+ method = method.to_s.upcase
679
+ host = @host.split('/').last
680
+
681
+ plaintext = "#{method}\n#{host}\n#{path}\n#{encoded_args}"
682
+ signature = @priv_key.sign(@digest, plaintext)
683
+ encoded_signature = CGI.escape(Base64.strict_encode64(signature))
684
+
685
+ host + path + '?' + encoded_args + '&signature=' + encoded_signature
686
+ end
687
+
688
+
689
+
690
+ # Create some Manta error classes
691
+ class MantaClientError < StandardError; end
692
+ for class_name in ERROR_CLASSES
693
+ MantaClient.const_set(class_name, Class.new(MantaClientError))
694
+ end
695
+
696
+
697
+
698
+ # ---------------------------------------------------------------------------
699
+ protected
700
+
701
+
702
+
703
+ # Fetch lists of objects that have a given status.
704
+ #
705
+ # type takes one of three values (:in, :out, fail), path must start with
706
+ # /<user>/jobs/<job UUID> and point at an actual job.
707
+ #
708
+ # Returns an array of object paths, along with received HTTP headers.
709
+ #
710
+ # If there was an unrecoverable error, throws an exception. On connection or
711
+ # corruption errors, more attempts will be made; the number of attempts can
712
+ # be altered by passing in :attempts.
713
+ def get_job_state_streams(type, path, opts)
714
+ raise ArgumentError unless [:in, :out, :fail].include? type
715
+
716
+ url = job_url(path, '/live/' + type.to_s)
717
+ headers = gen_headers(opts)
718
+
719
+ attempt(opts[:attempts]) do
720
+ #method = opts[:head] ? :head : :get
721
+ method = :get # until added to Manta service
722
+ result = @client.send(method, url, nil, headers)
723
+ raise unless result.is_a? HTTP::Message
724
+
725
+ if result.status == 200
726
+ raise unless result.headers['Content-Type'] == 'text/plain'
727
+ return true, result.headers if method == :head
728
+ paths = result.body.split("\n")
729
+ return paths, result.headers
730
+ end
731
+
732
+ raise_error(result)
733
+ end
734
+ end
735
+
736
+
737
+
738
+ # Returns a full URL for a given path to an object.
739
+ def obj_url(path)
740
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
741
+
742
+ @host + path
743
+ end
744
+
745
+
746
+
747
+ # Returns a full URL for a given path to a job.
748
+ def job_url(*args)
749
+ path = if args.size == 0
750
+ @job_base
751
+ else
752
+ raise ArgumentError unless args.first =~ JOB_PATH_REGEX
753
+ args.join('/')
754
+ end
755
+
756
+ @host + path
757
+ end
758
+
759
+
760
+
761
+ # Executes a block. If there is a connection- or corruption-related exception
762
+ # the block will be reexecuted up to the `tries' argument. It will sleep
763
+ # for an exponentially-increasing number of seconds between retries.
764
+ def attempt(tries, &blk)
765
+ if tries
766
+ raise ArgumentError unless tries > 0
767
+ else
768
+ tries ||= @attempts
769
+ end
770
+
771
+ attempt = 1
772
+
773
+ while true
774
+ begin
775
+ return yield blk
776
+ rescue Errno::ECONNREFUSED, HTTPClient::TimeoutError,
777
+ CorruptResult => e
778
+ raise e if attempt == tries
779
+ sleep 2 ** attempt
780
+ attempt += 1
781
+ end
782
+ end
783
+ end
784
+
785
+
786
+
787
+ # Creates headers to be given to the HTTP client and sent to the Manta
788
+ # service. The most important is the Authorization header, without which
789
+ # none of this class would work.
790
+ def gen_headers(opts)
791
+ now = Time.now.httpdate
792
+ sig = gen_signature('date: ' + now)
793
+
794
+ headers = [[ 'Date', now ],
795
+ [ 'Authorization', sig ],
796
+ [ 'User-Agent', HTTP_AGENT ],
797
+ [ 'Accept-Version', '~1.0' ]]
798
+
799
+
800
+ # headers for conditional requests (dates)
801
+ for arg, conditional in [[:if_modified_since, 'If-Modified-Since' ],
802
+ [:if_unmodified_since, 'If-Unmodified-Since']]
803
+ date = opts[arg]
804
+ next unless date
805
+
806
+ date = Time.parse(date.to_s) unless date.kind_of? Time
807
+ headers.push([conditional, date])
808
+ end
809
+
810
+ # headers for conditional requests (etags)
811
+ for arg, conditional in [[:if_match, 'If-Match' ],
812
+ [:if_none_match, 'If-None-Match']]
813
+ etag = opts[arg]
814
+ next unless etag
815
+
816
+ raise ArgumentError unless etag.kind_of? String
817
+ headers.push([conditional, etag])
818
+ end
819
+
820
+ origin = opts[:origin]
821
+ if origin
822
+ raise ArgumentError unless origin == 'null' || origin =~ CORS_ORIGIN_REGEX
823
+ headers.push([ 'Origin', origin ])
824
+ end
825
+
826
+ # add md5 hash when sending data
827
+ data = opts[:data]
828
+ if data
829
+ md5 = Digest::MD5.base64digest(data)
830
+ headers.push([ 'Content-MD5', md5 ])
831
+ end
832
+
833
+ return headers
834
+ end
835
+
836
+
837
+
838
+ # Do some sanity checks and create CORS-related headers
839
+ #
840
+ # For more details, see http://www.w3.org/TR/cors/ and
841
+ # https://developer.mozilla.org/en-US/docs/HTTP/Access_control_CORS#Access-Control-Expose-Headers
842
+ def gen_cors_headers(opts)
843
+ headers = []
844
+
845
+ allow_credentials = opts[:access_control_allow_credentials]
846
+ if allow_credentials
847
+ allow_credentials = allow_credentials.to_s
848
+ raise ArgumentError unless allow_credentials == 'true' ||
849
+ allow_credentials == 'false'
850
+ headers.push([ 'Access-Control-Allow-Credentials', allow_credentials ])
851
+ end
852
+
853
+ allow_headers = opts[:access_control_allow_headers]
854
+ if allow_headers
855
+ raise ArgumentError unless allow_headers =~ CORS_HEADERS_REGEX
856
+ allow_headers = allow_headers.split(', ').map(&:downcase).sort.join(', ')
857
+ headers.push([ 'Access-Control-Allow-Headers', allow_headers ])
858
+ end
859
+
860
+ allow_methods = opts[:access_control_allow_methods]
861
+ if allow_methods
862
+ raise ArgumentError unless allow_methods.kind_of? String
863
+
864
+ unknown_methods = allow_methods.split(', ').reject do |str|
865
+ CORS_METHODS.include? str
866
+ end
867
+ raise ArgumentError unless unknown_methods.size == 0
868
+
869
+ headers.push([ 'Access-Control-Allow-Methods', allow_methods ])
870
+ end
871
+
872
+ allow_origin = opts[:access_control_allow_origin]
873
+ if allow_origin
874
+ raise ArgumentError unless allow_origin.kind_of? String
875
+ raise ArgumentError unless allow_origin == '*' ||
876
+ allow_origin == 'null' ||
877
+ allow_origin =~ CORS_ORIGIN_REGEX
878
+ headers.push([ 'Access-Control-Allow-Origin', allow_origin ])
879
+ end
880
+
881
+ expose_headers = opts[:access_control_expose_headers]
882
+ if expose_headers
883
+ raise ArgumentError unless expose_headers =~ CORS_HEADERS_REGEX
884
+ expose_headers = expose_headers.split(', ').map(&:downcase).sort.join(', ')
885
+ headers.push([ 'Access-Control-Expose-Headers', expose_headers ])
886
+ end
887
+
888
+ max_age = opts[:access_control_max_age]
889
+ if max_age
890
+ raise ArgumentError unless max_age.kind_of?(Integer) && max_age >= 0
891
+ headers.push([ 'Access-Control-Max-Age', max_age.to_s ])
892
+ end
893
+
894
+ headers
895
+ end
896
+
897
+
898
+
899
+ # Given a chunk of data, creates an HTTP signature which the Manta service
900
+ # understands and uses for authentication.
901
+ def gen_signature(data)
902
+ raise ArgumentError unless data
903
+
904
+ sig = @priv_key.sign(@digest, data)
905
+ base64sig = Base64.strict_encode64(sig)
906
+
907
+ return HTTP_SIGNATURE % [@user, @fingerprint, @digest_name, base64sig]
908
+ end
909
+
910
+
911
+
912
+ # Raises an appropriate exception given the HTTP response. If a 40* is
913
+ # returned, attempts to look up an appropriate error class and raise,
914
+ # otherwise raises an UnknownError.
915
+ def raise_error(result)
916
+ raise unless result.is_a? HTTP::Message
917
+
918
+ err = JSON.parse(result.body)
919
+ klass = MantaClient.const_get err['code']
920
+ raise klass, err['message']
921
+ rescue NameError, TypeError, JSON::ParserError
922
+ raise UnknownError, result.status.to_s + ': ' + result.body
923
+ end
924
+ end
925
+