ruby-manta 1.2.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,989 @@
1
+ # Copyright (c) 2012, Joyent, Inc. All rights reserved.
2
+ #
3
+ # ruby-manta is a simple low-abstraction layer which communicates with Joyent's
4
+ # Manta service.
5
+ #
6
+ # Manta is an HTTP-accessible object store supporting UNIX-based map-reduce
7
+ # jobs. Through ruby-manta a programmer can save/overwrite/delete objects
8
+ # stored on a Manta service, or run map-reduce jobs over those objects.
9
+ #
10
+ # ruby-manta should be thread-safe, and supports pooling of keep-alive
11
+ # connections to the same server (through HTTPClient). It only relies on the
12
+ # standard library and two pure Ruby libraries, so it should work anywhere.
13
+ #
14
+ # For more information about Manta and general ruby-manta usage, please see
15
+ # README.md.
16
+
17
+
18
+
19
+ require 'openssl'
20
+ require 'net/ssh'
21
+ require 'httpclient'
22
+ require 'base64'
23
+ require 'digest'
24
+ require 'time'
25
+ require 'json'
26
+ require 'cgi'
27
+ require 'uri'
28
+
29
+ require File.expand_path('../version', __FILE__)
30
+
31
+
32
+ module RubyManta
33
+ class MantaClient
34
+ DEFAULT_ATTEMPTS = 3
35
+ DEFAULT_CONNECT_TIMEOUT = 5
36
+ DEFAULT_SEND_TIMEOUT = 60
37
+ DEFAULT_RECEIVE_TIMEOUT = 60
38
+ MAX_LIMIT = 1000
39
+ HTTP_AGENT = "ruby-manta/#{VERSION} (#{RUBY_PLATFORM}; #{OpenSSL::OPENSSL_VERSION}) ruby/#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}"
40
+ HTTP_SIGNATURE = 'Signature keyId="/%s/keys/%s",algorithm="%s",signature="%s"'
41
+ OBJ_PATH_REGEX = Regexp.new('^/[^/]+(?:/?$|/stor|/public|/reports|/jobs)(?:/|$)')
42
+ JOB_PATH_REGEX = Regexp.new('^/[^/]+/jobs(?:/|$)')
43
+
44
+ # match one or more protocol and hostnames, with optional port numbers.
45
+ # E.g. "http://example.com https://example.com:8443"
46
+ CORS_ORIGIN_REGEX = Regexp.new('^\w+://[^\s\:]+(?:\:\d+)?' +
47
+ '(?:\s\w+://[^\s\:]+(?:\:\d+)?)*$')
48
+ CORS_HEADERS_REGEX = Regexp.new('^[\w-]+(?:, [\w-]+)*$')
49
+ CORS_METHODS = [ 'GET', 'POST', 'PUT', 'DELETE', 'OPTIONS' ]
50
+
51
+ ERROR_CLASSES = [ 'AuthorizationFailed', 'AuthSchemeNotAllowed',
52
+ 'BadRequest', 'Checksum', 'ConcurrentRequest',
53
+ 'ContentLength', 'ContentMD5Mismatch',
54
+ 'DirectoryDoesNotExist', 'DirectoryExists',
55
+ 'DirectoryNotEmpty', 'DirectoryOperation',
56
+ 'EntityExists', 'Internal', 'InvalidArgument',
57
+ 'InvalidAuthToken', 'InvalidCredentials',
58
+ 'InvalidDurabilityLevel', 'InvalidJob', 'InvalidKeyId',
59
+ 'InvalidLink', 'InvalidSignature', 'InvalidJobState',
60
+ 'JobNotFound', 'JobState', 'KeyDoesNotExist',
61
+ 'LinkNotFound', 'LinkNotObject', 'LinkRequired',
62
+ 'NotAcceptable', 'NotEnoughSpace', 'ParentNotDirectory',
63
+ 'PreconditionFailed', 'PreSignedRequest',
64
+ 'RequestEntityTooLarge', 'ResourceNotFound',
65
+ 'RootDirectory', 'SecureTransportRequired',
66
+ 'ServiceUnavailable', 'SourceObjectNotFound',
67
+ 'SSLRequired', 'TaskInit', 'UploadTimeout',
68
+ 'UserDoesNotExist', 'UserTaskError',
69
+ # and errors that are specific to this class:
70
+ 'CorruptResult', 'UnknownError',
71
+ 'UnsupportedKey' ]
72
+
73
+
74
+
75
+ # Initialize a MantaClient instance.
76
+ #
77
+ # priv_key_data is data read directly from an SSH private key (i.e. RFC 4716
78
+ # format). The method can also accept several optional args: :connect_timeout,
79
+ # :send_timeout, :receive_timeout, :disable_ssl_verification and :attempts.
80
+ # The timeouts are in seconds, and :attempts determines the default number of
81
+ # attempts each method will make upon receiving recoverable errors.
82
+ #
83
+ # Will throw an exception if given a key whose format it doesn't understand.
84
+ def initialize(host, user, priv_key_data, opts = {})
85
+ raise ArgumentError unless host =~ /^https{0,1}:\/\/.*[^\/]/
86
+ raise ArgumentError unless user.is_a?(String) && user.size > 0
87
+
88
+ @host = host
89
+ @user = user
90
+ @subuser = opts[:subuser] ? opts[:subuser] : nil
91
+
92
+ @attempts = opts[:attempts] || DEFAULT_ATTEMPTS
93
+ raise ArgumentError unless @attempts > 0
94
+
95
+ if priv_key_data =~ /BEGIN RSA/
96
+ @digest = OpenSSL::Digest::SHA1.new
97
+ @digest_name = 'rsa-sha1'
98
+ algorithm = OpenSSL::PKey::RSA
99
+ elsif priv_key_data =~ /BEGIN DSA/
100
+ @digest = OpenSSL::Digest::DSS1.new
101
+ @digest_name = 'dsa-sha1'
102
+ algorithm = OpenSSL::PKey::DSA
103
+ else
104
+ raise UnsupportedKey
105
+ end
106
+
107
+ @priv_key = algorithm.new(priv_key_data)
108
+ @fingerprint = OpenSSL::Digest::MD5.hexdigest(@priv_key.to_blob).
109
+ scan(/../).join(':')
110
+
111
+ @client = HTTPClient.new
112
+ @client.connect_timeout = opts[:connect_timeout] || DEFAULT_CONNECT_TIMEOUT
113
+ @client.send_timeout = opts[:send_timeout ] || DEFAULT_SEND_TIMEOUT
114
+ @client.receive_timeout = opts[:receive_timeout] || DEFAULT_RECEIVE_TIMEOUT
115
+ @client.ssl_config.verify_mode = nil if opts[:disable_ssl_verification]
116
+
117
+ @job_base = '/' + @user + '/jobs'
118
+ end
119
+
120
+
121
+
122
+ # Uploads object data to Manta to the given path, along with a computed MD5
123
+ # hash.
124
+ #
125
+ # The path must start with /<user>/stor or /<user/public. Data can be any
126
+ # sequence of octets. The HTTP Content-Type stored on Manta can be set
127
+ # with an optional :content_type argument; the default is
128
+ # application/octet-stream. The number of distributed replicates of an object
129
+ # stored in Manta can be set with an optional :durability_level; the default
130
+ # is 2.
131
+ #
132
+ # Returns true along with received HTTP headers.
133
+ #
134
+ # If there was an unrecoverable error, throws an exception. On connection or
135
+ # corruption errors, more attempts will be made; the number of attempts can
136
+ # be altered by passing in :attempts.
137
+ def put_object(obj_path, data, opts = {})
138
+ url = obj_url(obj_path)
139
+
140
+ opts[:data] = data
141
+ headers = gen_headers(opts)
142
+
143
+ cors_headers = gen_cors_headers(opts)
144
+ headers = headers.concat(cors_headers)
145
+
146
+ durability_level = opts[:durability_level]
147
+ if durability_level
148
+ raise ArgumentError unless durability_level > 0
149
+ headers.push([ 'Durability-Level', durability_level ])
150
+ end
151
+
152
+ content_type = opts[:content_type]
153
+ if content_type
154
+ raise ArgumentError unless content_type.is_a? String
155
+ headers.push([ 'Content-Type', content_type ])
156
+ end
157
+
158
+ attempt(opts[:attempts]) do
159
+ result = @client.put(url, data, headers)
160
+ raise unless result.is_a? HTTP::Message
161
+
162
+ return true, result.headers if [204, 304].include? result.status
163
+ raise_error(result)
164
+ end
165
+ end
166
+
167
+
168
+
169
+ # Get an object from Manta at a given path, and checks it's uncorrupted.
170
+ #
171
+ # The path must start with /<user>/stor or /<user/public and point at an
172
+ # actual object, as well as output objects for jobs. :head => true can
173
+ # optionally be passed in to do a HEAD instead of a GET.
174
+ #
175
+ # Returns the retrieved data along with received HTTP headers.
176
+ #
177
+ # If there was an unrecoverable error, throws an exception. On connection or
178
+ # corruption errors, more attempts will be made; the number of attempts can
179
+ # be altered by passing in :attempts.
180
+ def get_object(obj_path, opts = {})
181
+ url = obj_url(obj_path)
182
+ headers = gen_headers(opts)
183
+
184
+ attempt(opts[:attempts]) do
185
+ method = opts[:head] ? :head : :get
186
+ result = @client.send(method, url, nil, headers)
187
+ raise unless result.is_a? HTTP::Message
188
+
189
+ if result.status == 200
190
+ return true, result.headers if method == :head
191
+
192
+ sent_md5 = result.headers['Content-MD5']
193
+ received_md5 = Digest::MD5.base64digest(result.body)
194
+ raise CorruptResult if sent_md5 != received_md5
195
+
196
+ return result.body, result.headers
197
+ elsif result.status == 304
198
+ return nil, result.headers
199
+ end
200
+
201
+ raise_error(result)
202
+ end
203
+ end
204
+
205
+
206
+
207
+ # Deletes an object off Manta at a given path.
208
+ #
209
+ # The path must start with /<user>/stor or /<user/public and point at an
210
+ # actual object.
211
+ #
212
+ # Returns true along with received HTTP headers.
213
+ #
214
+ # If there was an unrecoverable error, throws an exception. On connection or
215
+ # corruption errors, more attempts will be made; the number of attempts can
216
+ # be altered by passing in :attempts.
217
+ def delete_object(obj_path, opts = {})
218
+ url = obj_url(obj_path)
219
+ headers = gen_headers(opts)
220
+
221
+ attempt(opts[:attempts]) do
222
+ result = @client.delete(url, nil, headers)
223
+ raise unless result.is_a? HTTP::Message
224
+
225
+ return true, result.headers if result.status == 204
226
+ raise_error(result)
227
+ end
228
+ end
229
+
230
+
231
+
232
+ # Creates a directory on Manta at a given path.
233
+ #
234
+ # The path must start with /<user>/stor or /<user/public.
235
+ #
236
+ # Returns true along with received HTTP headers.
237
+ #
238
+ # If there was an unrecoverable error, throws an exception. On connection or
239
+ # corruption errors, more attempts will be made; the number of attempts can
240
+ # be altered by passing in :attempts.
241
+ def put_directory(dir_path, opts = {})
242
+ url = obj_url(dir_path)
243
+ headers = gen_headers(opts)
244
+ headers.push([ 'Content-Type', 'application/json; type=directory' ])
245
+
246
+ cors_headers = gen_cors_headers(opts)
247
+ headers = headers.concat(cors_headers)
248
+
249
+ attempt(opts[:attempts]) do
250
+ result = @client.put(url, nil, headers)
251
+ raise unless result.is_a? HTTP::Message
252
+
253
+ return true, result.headers if result.status == 204
254
+ raise_error(result)
255
+ end
256
+ end
257
+
258
+
259
+
260
+ # Gets a lexicographically sorted directory listing on Manta at a given path,
261
+ #
262
+ # The path must start with /<user>/stor or /<user/public and point at an
263
+ # actual directory. :limit optionally changes the maximum number of entries;
264
+ # the default is 1000. If given :marker, an object name in the directory,
265
+ # returned directory entries will begin from that point. :head => true can
266
+ # optionally be passed in to do a HEAD instead of a GET.
267
+ #
268
+ # Returns an array of hash objects, each object representing a directory
269
+ # entry. Also returns the received HTTP headers.
270
+ #
271
+ # If there was an unrecoverable error, throws an exception. On connection or
272
+ # corruption errors, more attempts will be made; the number of attempts can
273
+ # be altered by passing in :attempts.
274
+ def list_directory(dir_path, opts = {})
275
+ url = obj_url(dir_path)
276
+ headers = gen_headers(opts)
277
+ query_parameters = {}
278
+
279
+ limit = opts[:limit] || MAX_LIMIT
280
+ raise ArgumentError unless 0 < limit && limit <= MAX_LIMIT
281
+ query_parameters[:limit] = limit
282
+
283
+ marker = opts[:marker]
284
+ if marker
285
+ raise ArgumentError unless marker.is_a? String
286
+ query_parameters[:marker] = marker
287
+ end
288
+
289
+ attempt(opts[:attempts]) do
290
+ method = opts[:head] ? :head : :get
291
+ result = @client.send(method, url, query_parameters, headers)
292
+ raise unless result.is_a? HTTP::Message
293
+
294
+ if result.status == 200
295
+ raise unless result.headers['Content-Type'] ==
296
+ 'application/x-json-stream; type=directory'
297
+
298
+ return true, result.headers if method == :head
299
+
300
+ json_chunks = result.body.split("\n")
301
+
302
+ if json_chunks.size > limit
303
+ raise CorruptResult
304
+ end
305
+
306
+ dir_entries = json_chunks.map { |i| JSON.parse(i) }
307
+
308
+ return dir_entries, result.headers
309
+ end
310
+
311
+ raise_error(result)
312
+ end
313
+ end
314
+
315
+
316
+ # Finds all objects recursively under a given directory. Optionally, a regular
317
+ # expression can be specified and used to filter the results returned.
318
+ def find(dir_path, opts = {})
319
+ regex = opts.key?(:regex) ? opts[:regex] : nil
320
+
321
+ # We should always be doing GET because switching between methods is used
322
+ # within this function.
323
+ opts.delete(:head)
324
+
325
+ begin
326
+ exists = list_directory(dir_path, head: true).first
327
+ rescue
328
+ exists = false
329
+ end
330
+
331
+ return [] unless exists
332
+
333
+ response = list_directory(dir_path, opts)
334
+ listing = response.first
335
+
336
+ listing.inject([]) do |memo, obj|
337
+ if obj['type'] == 'dir_path'
338
+ sub_dir = "#{dir_path}/#{obj['name']}"
339
+ sub_search = find(sub_dir, regex)
340
+ memo.push(*sub_search)
341
+ end
342
+
343
+ if obj['type'] == 'object'
344
+ file = "#{dir_path}/#{obj['name']}"
345
+
346
+ if !regex || obj['name'].match(regex)
347
+ memo.push file
348
+ end
349
+ end
350
+
351
+ memo
352
+ end
353
+ end
354
+
355
+
356
+
357
+ # Removes a directory from Manta at a given path.
358
+ #
359
+ # The path must start with /<user>/stor or /<user/public and point at an
360
+ # actual object.
361
+ #
362
+ # Returns true along with received HTTP headers.
363
+ #
364
+ # If there was an unrecoverable error, throws an exception. On connection or
365
+ # corruption errors, more attempts will be made; the number of attempts can
366
+ # be altered by passing in :attempts.
367
+ def delete_directory(dir_path, opts = {})
368
+ url = obj_url(dir_path)
369
+ headers = gen_headers(opts)
370
+
371
+ attempt(opts[:attempts]) do
372
+ result = @client.delete(url, nil, headers)
373
+ raise unless result.is_a? HTTP::Message
374
+
375
+ return true, result.headers if result.status == 204
376
+ raise_error(result)
377
+ end
378
+ end
379
+
380
+
381
+
382
+ # Creates a snaplink from one object in Manta at a given path to a different
383
+ # path.
384
+ #
385
+ # Both paths should start with /<user>/stor or /<user/public.
386
+ #
387
+ # Returns true along with received HTTP headers.
388
+ #
389
+ # If there was an unrecoverable error, throws an exception. On connection or
390
+ # corruption errors, more attempts will be made; the number of attempts can
391
+ # be altered by passing in :attempts.
392
+ def put_snaplink(orig_path, link_path, opts = {})
393
+ headers = gen_headers(opts)
394
+ headers.push([ 'Content-Type', 'application/json; type=link' ],
395
+ [ 'Location', obj_url(orig_path) ])
396
+
397
+ attempt(opts[:attempts]) do
398
+ result = @client.put(obj_url(link_path), nil, headers)
399
+ raise unless result.is_a? HTTP::Message
400
+
401
+ return true, result.headers if result.status == 204
402
+ raise_error(result)
403
+ end
404
+ end
405
+
406
+
407
+
408
+ # Creates a job in Manta.
409
+ #
410
+ # The job must be a hash, containing at minimum a :phases key. See README.md
411
+ # or the Manta docs to see the format and options for setting up a job on
412
+ # Manta; this method effectively just converts the job hash to JSON and sends
413
+ # to the Manta service.
414
+ #
415
+ # Returns the path for the new job, along with received HTTP headers.
416
+ #
417
+ # If there was an unrecoverable error, throws an exception. On connection or
418
+ # corruption errors, more attempts will be made; the number of attempts can
419
+ # be altered by passing in :attempts.
420
+ def create_job(job, opts = {})
421
+ raise ArgumentError unless job[:phases] || job['phases']
422
+
423
+ headers = gen_headers(opts)
424
+ headers.push([ 'Content-Type', 'application/json; type=job' ])
425
+ data = job.to_json
426
+
427
+ attempt(opts[:attempts]) do
428
+ result = @client.post(job_url(), data, headers)
429
+ raise unless result.is_a? HTTP::Message
430
+
431
+ if result.status == 201
432
+ location = result.headers['Location']
433
+ raise unless location
434
+
435
+ return location, result.headers
436
+ end
437
+
438
+ raise_error(result)
439
+ end
440
+ end
441
+
442
+
443
+
444
+ # Gets various information about a job in Manta at a given path.
445
+ #
446
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
447
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
448
+ #
449
+ # Returns a hash with job information, along with received HTTP headers.
450
+ #
451
+ # If there was an unrecoverable error, throws an exception. On connection or
452
+ # corruption errors, more attempts will be made; the number of attempts can
453
+ # be altered by passing in :attempts.
454
+ def get_job(job_path, opts = {})
455
+ url = job_url(job_path, '/live/status')
456
+ headers = gen_headers(opts)
457
+
458
+ attempt(opts[:attempts]) do
459
+ method = opts[:head] ? :head : :get
460
+ result = @client.send(method, url, nil, headers)
461
+ raise unless result.is_a? HTTP::Message
462
+
463
+ if result.status == 200
464
+ raise unless result.headers['Content-Type'] == 'application/json'
465
+
466
+ return true, result.headers if method == :head
467
+
468
+ job = JSON.parse(result.body)
469
+ return job, result.headers
470
+ end
471
+
472
+ raise_error(result)
473
+ end
474
+ end
475
+
476
+
477
+
478
+ # Gets errors that occured during the execution of a job in Manta at a given
479
+ # path.
480
+ #
481
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
482
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
483
+ #
484
+ # Returns an array of hashes, each hash containing information about an
485
+ # error; this information is best-effort by Manta, so it may not be complete.
486
+ # Also returns received HTTP headers.
487
+ #
488
+ # If there was an unrecoverable error, throws an exception. On connection or
489
+ # corruption errors, more attempts will be made; the number of attempts can
490
+ # be altered by passing in :attempts.
491
+ def get_job_errors(job_path, opts = {})
492
+ url = job_url(job_path, '/live/err')
493
+ headers = gen_headers(opts)
494
+
495
+ attempt(opts[:attempts]) do
496
+ method = opts[:head] ? :head : :get
497
+ result = @client.send(method, url, nil, headers)
498
+ raise unless result.is_a? HTTP::Message
499
+
500
+ if result.status == 200
501
+ raise unless result.headers['Content-Type'] ==
502
+ 'application/x-json-stream; type=job-error'
503
+
504
+ return true, result.headers if method == :head
505
+
506
+ json_chunks = result.body.split("\n")
507
+ errors = json_chunks.map { |i| JSON.parse(i) }
508
+
509
+ return errors, result.headers
510
+ end
511
+
512
+ raise_error(result)
513
+ end
514
+ end
515
+
516
+
517
+
518
+ # Cancels a running job in Manta at a given path.
519
+ #
520
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
521
+ #
522
+ # Returns true, along with received HTTP headers.
523
+ #
524
+ # If there was an unrecoverable error, throws an exception. On connection or
525
+ # corruption errors, more attempts will be made; the number of attempts can
526
+ # be altered by passing in :attempts.
527
+ def cancel_job(job_path, opts = {})
528
+ url = job_url(job_path, 'live/cancel')
529
+
530
+ body = '{}'
531
+
532
+ opts[:data] = body
533
+
534
+ headers = gen_headers(opts)
535
+
536
+ headers << [ 'Accept', 'application/json' ]
537
+ headers << [ 'Content-Type', 'application/json']
538
+ headers << [ 'Content-Length', body.bytesize ]
539
+
540
+ args = {
541
+ header: headers,
542
+ body: body
543
+ }
544
+
545
+ attempt(opts[:attempts]) do
546
+ result = @client.post(url, args)
547
+ raise unless result.is_a? HTTP::Message
548
+
549
+ return true, result.headers if result.status == 202
550
+ raise_error(result)
551
+ end
552
+ end
553
+
554
+
555
+
556
+ # Adds objects for a running job in Manta to process.
557
+ #
558
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
559
+ # running job. The obj_paths must be an array of paths, starting with
560
+ # /<user>/stor or /<user>/public, pointing at actual objects.
561
+ #
562
+ # Returns true, along with received HTTP headers.
563
+ #
564
+ # If there was an unrecoverable error, throws an exception. On connection or
565
+ # corruption errors, more attempts will be made; the number of attempts can
566
+ # be altered by passing in :attempts.
567
+ def add_job_keys(job_path, obj_paths, opts = {})
568
+ url = job_url(job_path, '/live/in')
569
+ headers = gen_headers(opts)
570
+ headers.push([ 'Content-Type', 'text/plain' ])
571
+
572
+ data = obj_paths.join("\n")
573
+
574
+ attempt(opts[:attempts]) do
575
+ result = @client.post(url, data, headers)
576
+ raise unless result.is_a? HTTP::Message
577
+
578
+ return true, result.headers if result.status == 204
579
+ raise_error(result)
580
+ end
581
+ end
582
+
583
+
584
+
585
+ # Inform Manta that no more objects will be added for processing by a job,
586
+ # and that the job should finish all phases and terminate.
587
+ #
588
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
589
+ # running job.
590
+ #
591
+ # Returns true, along with received HTTP headers.
592
+ #
593
+ # If there was an unrecoverable error, throws an exception. On connection or
594
+ # corruption errors, more attempts will be made; the number of attempts can
595
+ # be altered by passing in :attempts.
596
+ def end_job_input(job_path, opts = {})
597
+ url = job_url(job_path, '/live/in/end')
598
+ headers = gen_headers(opts)
599
+
600
+ attempt(opts[:attempts]) do
601
+ result = @client.post(url, nil, headers)
602
+ raise unless result.is_a? HTTP::Message
603
+
604
+ return true, result.headers if result.status == 202
605
+ raise_error(result)
606
+ end
607
+ end
608
+
609
+
610
+
611
+ # Get a list of objects that have been given to a Manta job for processing.
612
+ #
613
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
614
+ # running job.
615
+ #
616
+ # Returns an array of object paths, along with received HTTP headers.
617
+ #
618
+ # If there was an unrecoverable error, throws an exception. On connection or
619
+ # corruption errors, more attempts will be made; the number of attempts can
620
+ # be altered by passing in :attempts.
621
+ def get_job_input(job_path, opts = {})
622
+ get_job_state_streams(:in, job_path, opts)
623
+ end
624
+
625
+
626
+
627
+ # Get a list of objects that contain the intermediate results of a running
628
+ # Manta job.
629
+ #
630
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
631
+ # running job.
632
+ #
633
+ # Returns an array of object paths, along with received HTTP headers.
634
+ #
635
+ # If there was an unrecoverable error, throws an exception. On connection or
636
+ # corruption errors, more attempts will be made; the number of attempts can
637
+ # be altered by passing in :attempts.
638
+ def get_job_output(job_path, opts = {})
639
+ get_job_state_streams(:out, job_path, opts)
640
+ end
641
+
642
+
643
+
644
+ # Get a list of objects that had failures during processing in a Manta job.
645
+ #
646
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
647
+ # running job.
648
+ #
649
+ # Returns an array of object paths, along with received HTTP headers.
650
+ #
651
+ # If there was an unrecoverable error, throws an exception. On connection or
652
+ # corruption errors, more attempts will be made; the number of attempts can
653
+ # be altered by passing in :attempts.
654
+ def get_job_failures(job_path, opts = {})
655
+ get_job_state_streams(:fail, job_path, opts)
656
+ end
657
+
658
+
659
+
660
+ # Get lists of Manta jobs.
661
+ #
662
+ # The state indicates which kind of jobs to return. :running is for jobs
663
+ # that are currently processing, :done and :all should be obvious. Be careful
664
+ # of the latter two if you've run a lot of jobs -- the list could be quite
665
+ # long.
666
+ #
667
+ # Returns an array of hashes, each hash containing some information about a
668
+ # job. Also returns received HTTP headers.
669
+ #
670
+ # If there was an unrecoverable error, throws an exception. On connection or
671
+ # corruption errors, more attempts will be made; the number of attempts can
672
+ # be altered by passing in :attempts.
673
+ def list_jobs(state, opts = {})
674
+ raise ArgumentError unless [:all, :running, :done].include? state
675
+ state = nil if state == :all
676
+
677
+ headers = gen_headers(opts)
678
+
679
+ attempt(opts[:attempts]) do
680
+ # method = opts[:head] ? :head : :get
681
+ method = :get # until added to Manta service
682
+ result = @client.send(method, job_url(), { :state => state }, headers)
683
+ raise unless result.is_a? HTTP::Message
684
+
685
+ if result.status == 200
686
+ # return true, result.headers if method == :head
687
+ return [], result.headers if result.body.size == 0
688
+
689
+ raise unless result.headers['Content-Type'] ==
690
+ 'application/x-json-stream; type=job'
691
+
692
+ json_chunks = result.body.split("\n")
693
+ job_entries = json_chunks.map { |i| JSON.parse(i) }
694
+
695
+ return job_entries, result.headers
696
+ end
697
+
698
+ raise_error(result)
699
+ end
700
+ end
701
+
702
+
703
+
704
+ # Generates a signed URL which can be used by unauthenticated users to
705
+ # make a request to Manta at the given path. This is typically used to GET
706
+ # an object, or to make a CORS preflighted PUT request.
707
+ #
708
+ # expires is a Time object or integer representing time after epoch; this
709
+ # determines how long the signed URL will be valid for. The method is either a
710
+ # single HTTP method (:get, :put, :post, :delete, :options) or a list of such
711
+ # methods that the signed URL is allowed to be used for. The path must start
712
+ # with /<user>/stor. Lastly, the optional args is an array containing pairs of
713
+ # query args that will be appended at the end of the URL.
714
+ #
715
+ # The returned URL is signed, and can be used either over HTTP or HTTPS until
716
+ # it reaches the expiry date.
717
+ def gen_signed_url(expires, method, path, args=[])
718
+ methods = method.is_a?(Array) ? method : [method]
719
+ raise ArgumentError unless (methods - [:get, :put, :post, :delete, :options]).empty?
720
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
721
+
722
+ key_id = '/%s/keys/%s' % [user_path, @fingerprint]
723
+
724
+ args.push([ 'expires', expires.to_i ])
725
+ args.push([ 'algorithm', @digest_name ])
726
+ args.push([ 'keyId', key_id ])
727
+
728
+ method = methods.map {|m| m.to_s.upcase }.sort.join(",")
729
+ host = URI.encode(@host.split('/').last)
730
+ path = URI.encode(path)
731
+
732
+ args.push(['method', method]) if methods.count > 1
733
+
734
+ encoded_args = args.sort.map do |key, val|
735
+ # to comply with RFC 3986
736
+ CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
737
+ end.join('&')
738
+
739
+ plaintext = "#{method}\n#{host}\n#{path}\n#{encoded_args}"
740
+ signature = @priv_key.sign(@digest, plaintext)
741
+ encoded_signature = CGI.escape(Base64.strict_encode64(signature))
742
+
743
+ host + path + '?' + encoded_args + '&signature=' + encoded_signature
744
+ end
745
+
746
+
747
+
748
+ # Create some Manta error classes
749
+ class MantaClientError < StandardError; end
750
+ for class_name in ERROR_CLASSES
751
+ MantaClient.const_set(class_name, Class.new(MantaClientError))
752
+ end
753
+
754
+
755
+
756
+ # Creates a qualified user path consisting of the user and subuser if the
757
+ # subuser is present. Otherwise, it returns the user
758
+ def user_path
759
+ @subuser ? "#{@user}/#{@subuser}" : @user
760
+ end
761
+
762
+
763
+
764
+ # ---------------------------------------------------------------------------
765
+ protected
766
+
767
+
768
+
769
+ # Fetch lists of objects that have a given status.
770
+ #
771
+ # type takes one of three values (:in, :out, fail), path must start with
772
+ # /<user>/jobs/<job UUID> and point at an actual job.
773
+ #
774
+ # Returns an array of object paths, along with received HTTP headers.
775
+ #
776
+ # If there was an unrecoverable error, throws an exception. On connection or
777
+ # corruption errors, more attempts will be made; the number of attempts can
778
+ # be altered by passing in :attempts.
779
+ def get_job_state_streams(type, path, opts)
780
+ raise ArgumentError unless [:in, :out, :fail].include? type
781
+
782
+ url = job_url(path, '/live/' + type.to_s)
783
+ headers = gen_headers(opts)
784
+
785
+ attempt(opts[:attempts]) do
786
+ #method = opts[:head] ? :head : :get
787
+ method = :get # until added to Manta service
788
+ result = @client.send(method, url, nil, headers)
789
+ raise unless result.is_a? HTTP::Message
790
+
791
+ if result.status == 200
792
+ raise unless result.headers['Content-Type'] == 'text/plain'
793
+ return true, result.headers if method == :head
794
+ paths = result.body.split("\n")
795
+ return paths, result.headers
796
+ end
797
+
798
+ raise_error(result)
799
+ end
800
+ end
801
+
802
+
803
+
804
+ # Returns a full URL for a given path to an object.
805
+ def obj_url(path)
806
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
807
+
808
+ URI.encode(@host + path)
809
+ end
810
+
811
+
812
+
813
+ # Returns a full URL for a given path to a job.
814
+ def job_url(*args)
815
+ path = if args.size == 0
816
+ @job_base
817
+ else
818
+ raise ArgumentError unless args.first =~ JOB_PATH_REGEX
819
+ args.join('/')
820
+ end
821
+
822
+ URI.encode(@host + path)
823
+ end
824
+
825
+
826
+
827
+ # Executes a block. If there is a connection- or corruption-related exception
828
+ # the block will be reexecuted up to the `tries' argument. It will sleep
829
+ # for an exponentially-increasing number of seconds between retries.
830
+ def attempt(tries, &blk)
831
+ if tries
832
+ raise ArgumentError unless tries > 0
833
+ else
834
+ tries ||= @attempts
835
+ end
836
+
837
+ attempt = 1
838
+
839
+ while true
840
+ begin
841
+ return yield blk
842
+ rescue Errno::ECONNREFUSED, HTTPClient::TimeoutError,
843
+ CorruptResult => e
844
+ raise e if attempt == tries
845
+ sleep 2 ** attempt
846
+ attempt += 1
847
+ end
848
+ end
849
+ end
850
+
851
+
852
+
853
+ # Creates headers to be given to the HTTP client and sent to the Manta
854
+ # service. The most important is the Authorization header, without which
855
+ # none of this class would work.
856
+ def gen_headers(opts)
857
+ now = Time.now.httpdate
858
+ sig = gen_signature('date: ' + now)
859
+
860
+ headers = [[ 'Date', now ],
861
+ [ 'Authorization', sig ],
862
+ [ 'User-Agent', HTTP_AGENT ],
863
+ [ 'Accept-Version', '~1.0' ]]
864
+
865
+
866
+ # headers for conditional requests (dates)
867
+ for arg, conditional in [[:if_modified_since, 'If-Modified-Since' ],
868
+ [:if_unmodified_since, 'If-Unmodified-Since']]
869
+ date = opts[arg]
870
+ next unless date
871
+
872
+ date = Time.parse(date.to_s) unless date.kind_of? Time
873
+ headers.push([conditional, date])
874
+ end
875
+
876
+ # headers for conditional requests (etags)
877
+ for arg, conditional in [[:if_match, 'If-Match' ],
878
+ [:if_none_match, 'If-None-Match']]
879
+ etag = opts[arg]
880
+ next unless etag
881
+
882
+ raise ArgumentError unless etag.kind_of? String
883
+ headers.push([conditional, etag])
884
+ end
885
+
886
+ origin = opts[:origin]
887
+ if origin
888
+ raise ArgumentError unless origin == 'null' || origin =~ CORS_ORIGIN_REGEX
889
+ headers.push([ 'Origin', origin ])
890
+ end
891
+
892
+ # add md5 hash when sending data
893
+ data = opts[:data]
894
+ if data
895
+ md5 = Digest::MD5.base64digest(data)
896
+ headers.push([ 'Content-MD5', md5 ])
897
+ end
898
+
899
+ return headers
900
+ end
901
+
902
+
903
+
904
+ # Do some sanity checks and create CORS-related headers
905
+ #
906
+ # For more details, see http://www.w3.org/TR/cors/ and
907
+ # https://developer.mozilla.org/en-US/docs/HTTP/Access_control_CORS#Access-Control-Expose-Headers
908
+ def gen_cors_headers(opts)
909
+ headers = []
910
+
911
+ allow_credentials = opts[:access_control_allow_credentials]
912
+ if allow_credentials
913
+ allow_credentials = allow_credentials.to_s
914
+ raise ArgumentError unless allow_credentials == 'true' ||
915
+ allow_credentials == 'false'
916
+ headers.push([ 'Access-Control-Allow-Credentials', allow_credentials ])
917
+ end
918
+
919
+ allow_headers = opts[:access_control_allow_headers]
920
+ if allow_headers
921
+ raise ArgumentError unless allow_headers =~ CORS_HEADERS_REGEX
922
+ allow_headers = allow_headers.split(', ').map(&:downcase).sort.join(', ')
923
+ headers.push([ 'Access-Control-Allow-Headers', allow_headers ])
924
+ end
925
+
926
+ allow_methods = opts[:access_control_allow_methods]
927
+ if allow_methods
928
+ raise ArgumentError unless allow_methods.kind_of? String
929
+
930
+ unknown_methods = allow_methods.split(', ').reject do |str|
931
+ CORS_METHODS.include? str
932
+ end
933
+ raise ArgumentError unless unknown_methods.size == 0
934
+
935
+ headers.push([ 'Access-Control-Allow-Methods', allow_methods ])
936
+ end
937
+
938
+ allow_origin = opts[:access_control_allow_origin]
939
+ if allow_origin
940
+ raise ArgumentError unless allow_origin.kind_of? String
941
+ raise ArgumentError unless allow_origin == '*' ||
942
+ allow_origin == 'null' ||
943
+ allow_origin =~ CORS_ORIGIN_REGEX
944
+ headers.push([ 'Access-Control-Allow-Origin', allow_origin ])
945
+ end
946
+
947
+ expose_headers = opts[:access_control_expose_headers]
948
+ if expose_headers
949
+ raise ArgumentError unless expose_headers =~ CORS_HEADERS_REGEX
950
+ expose_headers = expose_headers.split(', ').map(&:downcase).sort.join(', ')
951
+ headers.push([ 'Access-Control-Expose-Headers', expose_headers ])
952
+ end
953
+
954
+ max_age = opts[:access_control_max_age]
955
+ if max_age
956
+ raise ArgumentError unless max_age.kind_of?(Integer) && max_age >= 0
957
+ headers.push([ 'Access-Control-Max-Age', max_age.to_s ])
958
+ end
959
+
960
+ headers
961
+ end
962
+
963
+ # Given a chunk of data, creates an HTTP signature which the Manta service
964
+ # understands and uses for authentication.
965
+ def gen_signature(data)
966
+ raise ArgumentError unless data
967
+
968
+ sig = @priv_key.sign(@digest, data)
969
+ base64sig = Base64.strict_encode64(sig)
970
+
971
+ return HTTP_SIGNATURE % [user_path, @fingerprint, @digest_name, base64sig]
972
+ end
973
+
974
+
975
+
976
+ # Raises an appropriate exception given the HTTP response. If a 40* is
977
+ # returned, attempts to look up an appropriate error class and raise,
978
+ # otherwise raises an UnknownError.
979
+ def raise_error(result)
980
+ raise unless result.is_a? HTTP::Message
981
+
982
+ err = JSON.parse(result.body)
983
+ klass = MantaClient.const_get err['code']
984
+ raise klass, err['message']
985
+ rescue NameError, TypeError, JSON::ParserError
986
+ raise UnknownError, result.status.to_s + ': ' + result.body
987
+ end
988
+ end
989
+ end