ruby-manta 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,989 @@
1
+ # Copyright (c) 2012, Joyent, Inc. All rights reserved.
2
+ #
3
+ # ruby-manta is a simple low-abstraction layer which communicates with Joyent's
4
+ # Manta service.
5
+ #
6
+ # Manta is an HTTP-accessible object store supporting UNIX-based map-reduce
7
+ # jobs. Through ruby-manta a programmer can save/overwrite/delete objects
8
+ # stored on a Manta service, or run map-reduce jobs over those objects.
9
+ #
10
+ # ruby-manta should be thread-safe, and supports pooling of keep-alive
11
+ # connections to the same server (through HTTPClient). It only relies on the
12
+ # standard library and two pure Ruby libraries, so it should work anywhere.
13
+ #
14
+ # For more information about Manta and general ruby-manta usage, please see
15
+ # README.md.
16
+
17
+
18
+
19
+ require 'openssl'
20
+ require 'net/ssh'
21
+ require 'httpclient'
22
+ require 'base64'
23
+ require 'digest'
24
+ require 'time'
25
+ require 'json'
26
+ require 'cgi'
27
+ require 'uri'
28
+
29
+ require File.expand_path('../version', __FILE__)
30
+
31
+
32
+ module RubyManta
33
+ class MantaClient
34
+ DEFAULT_ATTEMPTS = 3
35
+ DEFAULT_CONNECT_TIMEOUT = 5
36
+ DEFAULT_SEND_TIMEOUT = 60
37
+ DEFAULT_RECEIVE_TIMEOUT = 60
38
+ MAX_LIMIT = 1000
39
+ HTTP_AGENT = "ruby-manta/#{VERSION} (#{RUBY_PLATFORM}; #{OpenSSL::OPENSSL_VERSION}) ruby/#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}"
40
+ HTTP_SIGNATURE = 'Signature keyId="/%s/keys/%s",algorithm="%s",signature="%s"'
41
+ OBJ_PATH_REGEX = Regexp.new('^/[^/]+(?:/?$|/stor|/public|/reports|/jobs)(?:/|$)')
42
+ JOB_PATH_REGEX = Regexp.new('^/[^/]+/jobs(?:/|$)')
43
+
44
+ # match one or more protocol and hostnames, with optional port numbers.
45
+ # E.g. "http://example.com https://example.com:8443"
46
+ CORS_ORIGIN_REGEX = Regexp.new('^\w+://[^\s\:]+(?:\:\d+)?' +
47
+ '(?:\s\w+://[^\s\:]+(?:\:\d+)?)*$')
48
+ CORS_HEADERS_REGEX = Regexp.new('^[\w-]+(?:, [\w-]+)*$')
49
+ CORS_METHODS = [ 'GET', 'POST', 'PUT', 'DELETE', 'OPTIONS' ]
50
+
51
+ ERROR_CLASSES = [ 'AuthorizationFailed', 'AuthSchemeNotAllowed',
52
+ 'BadRequest', 'Checksum', 'ConcurrentRequest',
53
+ 'ContentLength', 'ContentMD5Mismatch',
54
+ 'DirectoryDoesNotExist', 'DirectoryExists',
55
+ 'DirectoryNotEmpty', 'DirectoryOperation',
56
+ 'EntityExists', 'Internal', 'InvalidArgument',
57
+ 'InvalidAuthToken', 'InvalidCredentials',
58
+ 'InvalidDurabilityLevel', 'InvalidJob', 'InvalidKeyId',
59
+ 'InvalidLink', 'InvalidSignature', 'InvalidJobState',
60
+ 'JobNotFound', 'JobState', 'KeyDoesNotExist',
61
+ 'LinkNotFound', 'LinkNotObject', 'LinkRequired',
62
+ 'NotAcceptable', 'NotEnoughSpace', 'ParentNotDirectory',
63
+ 'PreconditionFailed', 'PreSignedRequest',
64
+ 'RequestEntityTooLarge', 'ResourceNotFound',
65
+ 'RootDirectory', 'SecureTransportRequired',
66
+ 'ServiceUnavailable', 'SourceObjectNotFound',
67
+ 'SSLRequired', 'TaskInit', 'UploadTimeout',
68
+ 'UserDoesNotExist', 'UserTaskError',
69
+ # and errors that are specific to this class:
70
+ 'CorruptResult', 'UnknownError',
71
+ 'UnsupportedKey' ]
72
+
73
+
74
+
75
+ # Initialize a MantaClient instance.
76
+ #
77
+ # priv_key_data is data read directly from an SSH private key (i.e. RFC 4716
78
+ # format). The method can also accept several optional args: :connect_timeout,
79
+ # :send_timeout, :receive_timeout, :disable_ssl_verification and :attempts.
80
+ # The timeouts are in seconds, and :attempts determines the default number of
81
+ # attempts each method will make upon receiving recoverable errors.
82
+ #
83
+ # Will throw an exception if given a key whose format it doesn't understand.
84
+ def initialize(host, user, priv_key_data, opts = {})
85
+ raise ArgumentError unless host =~ /^https{0,1}:\/\/.*[^\/]/
86
+ raise ArgumentError unless user.is_a?(String) && user.size > 0
87
+
88
+ @host = host
89
+ @user = user
90
+ @subuser = opts[:subuser] ? opts[:subuser] : nil
91
+
92
+ @attempts = opts[:attempts] || DEFAULT_ATTEMPTS
93
+ raise ArgumentError unless @attempts > 0
94
+
95
+ if priv_key_data =~ /BEGIN RSA/
96
+ @digest = OpenSSL::Digest::SHA1.new
97
+ @digest_name = 'rsa-sha1'
98
+ algorithm = OpenSSL::PKey::RSA
99
+ elsif priv_key_data =~ /BEGIN DSA/
100
+ @digest = OpenSSL::Digest::DSS1.new
101
+ @digest_name = 'dsa-sha1'
102
+ algorithm = OpenSSL::PKey::DSA
103
+ else
104
+ raise UnsupportedKey
105
+ end
106
+
107
+ @priv_key = algorithm.new(priv_key_data)
108
+ @fingerprint = OpenSSL::Digest::MD5.hexdigest(@priv_key.to_blob).
109
+ scan(/../).join(':')
110
+
111
+ @client = HTTPClient.new
112
+ @client.connect_timeout = opts[:connect_timeout] || DEFAULT_CONNECT_TIMEOUT
113
+ @client.send_timeout = opts[:send_timeout ] || DEFAULT_SEND_TIMEOUT
114
+ @client.receive_timeout = opts[:receive_timeout] || DEFAULT_RECEIVE_TIMEOUT
115
+ @client.ssl_config.verify_mode = nil if opts[:disable_ssl_verification]
116
+
117
+ @job_base = '/' + @user + '/jobs'
118
+ end
119
+
120
+
121
+
122
+ # Uploads object data to Manta to the given path, along with a computed MD5
123
+ # hash.
124
+ #
125
+ # The path must start with /<user>/stor or /<user/public. Data can be any
126
+ # sequence of octets. The HTTP Content-Type stored on Manta can be set
127
+ # with an optional :content_type argument; the default is
128
+ # application/octet-stream. The number of distributed replicates of an object
129
+ # stored in Manta can be set with an optional :durability_level; the default
130
+ # is 2.
131
+ #
132
+ # Returns true along with received HTTP headers.
133
+ #
134
+ # If there was an unrecoverable error, throws an exception. On connection or
135
+ # corruption errors, more attempts will be made; the number of attempts can
136
+ # be altered by passing in :attempts.
137
+ def put_object(obj_path, data, opts = {})
138
+ url = obj_url(obj_path)
139
+
140
+ opts[:data] = data
141
+ headers = gen_headers(opts)
142
+
143
+ cors_headers = gen_cors_headers(opts)
144
+ headers = headers.concat(cors_headers)
145
+
146
+ durability_level = opts[:durability_level]
147
+ if durability_level
148
+ raise ArgumentError unless durability_level > 0
149
+ headers.push([ 'Durability-Level', durability_level ])
150
+ end
151
+
152
+ content_type = opts[:content_type]
153
+ if content_type
154
+ raise ArgumentError unless content_type.is_a? String
155
+ headers.push([ 'Content-Type', content_type ])
156
+ end
157
+
158
+ attempt(opts[:attempts]) do
159
+ result = @client.put(url, data, headers)
160
+ raise unless result.is_a? HTTP::Message
161
+
162
+ return true, result.headers if [204, 304].include? result.status
163
+ raise_error(result)
164
+ end
165
+ end
166
+
167
+
168
+
169
+ # Get an object from Manta at a given path, and checks it's uncorrupted.
170
+ #
171
+ # The path must start with /<user>/stor or /<user/public and point at an
172
+ # actual object, as well as output objects for jobs. :head => true can
173
+ # optionally be passed in to do a HEAD instead of a GET.
174
+ #
175
+ # Returns the retrieved data along with received HTTP headers.
176
+ #
177
+ # If there was an unrecoverable error, throws an exception. On connection or
178
+ # corruption errors, more attempts will be made; the number of attempts can
179
+ # be altered by passing in :attempts.
180
+ def get_object(obj_path, opts = {})
181
+ url = obj_url(obj_path)
182
+ headers = gen_headers(opts)
183
+
184
+ attempt(opts[:attempts]) do
185
+ method = opts[:head] ? :head : :get
186
+ result = @client.send(method, url, nil, headers)
187
+ raise unless result.is_a? HTTP::Message
188
+
189
+ if result.status == 200
190
+ return true, result.headers if method == :head
191
+
192
+ sent_md5 = result.headers['Content-MD5']
193
+ received_md5 = Digest::MD5.base64digest(result.body)
194
+ raise CorruptResult if sent_md5 != received_md5
195
+
196
+ return result.body, result.headers
197
+ elsif result.status == 304
198
+ return nil, result.headers
199
+ end
200
+
201
+ raise_error(result)
202
+ end
203
+ end
204
+
205
+
206
+
207
+ # Deletes an object off Manta at a given path.
208
+ #
209
+ # The path must start with /<user>/stor or /<user/public and point at an
210
+ # actual object.
211
+ #
212
+ # Returns true along with received HTTP headers.
213
+ #
214
+ # If there was an unrecoverable error, throws an exception. On connection or
215
+ # corruption errors, more attempts will be made; the number of attempts can
216
+ # be altered by passing in :attempts.
217
+ def delete_object(obj_path, opts = {})
218
+ url = obj_url(obj_path)
219
+ headers = gen_headers(opts)
220
+
221
+ attempt(opts[:attempts]) do
222
+ result = @client.delete(url, nil, headers)
223
+ raise unless result.is_a? HTTP::Message
224
+
225
+ return true, result.headers if result.status == 204
226
+ raise_error(result)
227
+ end
228
+ end
229
+
230
+
231
+
232
+ # Creates a directory on Manta at a given path.
233
+ #
234
+ # The path must start with /<user>/stor or /<user/public.
235
+ #
236
+ # Returns true along with received HTTP headers.
237
+ #
238
+ # If there was an unrecoverable error, throws an exception. On connection or
239
+ # corruption errors, more attempts will be made; the number of attempts can
240
+ # be altered by passing in :attempts.
241
+ def put_directory(dir_path, opts = {})
242
+ url = obj_url(dir_path)
243
+ headers = gen_headers(opts)
244
+ headers.push([ 'Content-Type', 'application/json; type=directory' ])
245
+
246
+ cors_headers = gen_cors_headers(opts)
247
+ headers = headers.concat(cors_headers)
248
+
249
+ attempt(opts[:attempts]) do
250
+ result = @client.put(url, nil, headers)
251
+ raise unless result.is_a? HTTP::Message
252
+
253
+ return true, result.headers if result.status == 204
254
+ raise_error(result)
255
+ end
256
+ end
257
+
258
+
259
+
260
+ # Gets a lexicographically sorted directory listing on Manta at a given path,
261
+ #
262
+ # The path must start with /<user>/stor or /<user/public and point at an
263
+ # actual directory. :limit optionally changes the maximum number of entries;
264
+ # the default is 1000. If given :marker, an object name in the directory,
265
+ # returned directory entries will begin from that point. :head => true can
266
+ # optionally be passed in to do a HEAD instead of a GET.
267
+ #
268
+ # Returns an array of hash objects, each object representing a directory
269
+ # entry. Also returns the received HTTP headers.
270
+ #
271
+ # If there was an unrecoverable error, throws an exception. On connection or
272
+ # corruption errors, more attempts will be made; the number of attempts can
273
+ # be altered by passing in :attempts.
274
+ def list_directory(dir_path, opts = {})
275
+ url = obj_url(dir_path)
276
+ headers = gen_headers(opts)
277
+ query_parameters = {}
278
+
279
+ limit = opts[:limit] || MAX_LIMIT
280
+ raise ArgumentError unless 0 < limit && limit <= MAX_LIMIT
281
+ query_parameters[:limit] = limit
282
+
283
+ marker = opts[:marker]
284
+ if marker
285
+ raise ArgumentError unless marker.is_a? String
286
+ query_parameters[:marker] = marker
287
+ end
288
+
289
+ attempt(opts[:attempts]) do
290
+ method = opts[:head] ? :head : :get
291
+ result = @client.send(method, url, query_parameters, headers)
292
+ raise unless result.is_a? HTTP::Message
293
+
294
+ if result.status == 200
295
+ raise unless result.headers['Content-Type'] ==
296
+ 'application/x-json-stream; type=directory'
297
+
298
+ return true, result.headers if method == :head
299
+
300
+ json_chunks = result.body.split("\n")
301
+
302
+ if json_chunks.size > limit
303
+ raise CorruptResult
304
+ end
305
+
306
+ dir_entries = json_chunks.map { |i| JSON.parse(i) }
307
+
308
+ return dir_entries, result.headers
309
+ end
310
+
311
+ raise_error(result)
312
+ end
313
+ end
314
+
315
+
316
+ # Finds all objects recursively under a given directory. Optionally, a regular
317
+ # expression can be specified and used to filter the results returned.
318
+ def find(dir_path, opts = {})
319
+ regex = opts.key?(:regex) ? opts[:regex] : nil
320
+
321
+ # We should always be doing GET because switching between methods is used
322
+ # within this function.
323
+ opts.delete(:head)
324
+
325
+ begin
326
+ exists = list_directory(dir_path, head: true).first
327
+ rescue
328
+ exists = false
329
+ end
330
+
331
+ return [] unless exists
332
+
333
+ response = list_directory(dir_path, opts)
334
+ listing = response.first
335
+
336
+ listing.inject([]) do |memo, obj|
337
+ if obj['type'] == 'dir_path'
338
+ sub_dir = "#{dir_path}/#{obj['name']}"
339
+ sub_search = find(sub_dir, regex)
340
+ memo.push(*sub_search)
341
+ end
342
+
343
+ if obj['type'] == 'object'
344
+ file = "#{dir_path}/#{obj['name']}"
345
+
346
+ if !regex || obj['name'].match(regex)
347
+ memo.push file
348
+ end
349
+ end
350
+
351
+ memo
352
+ end
353
+ end
354
+
355
+
356
+
357
+ # Removes a directory from Manta at a given path.
358
+ #
359
+ # The path must start with /<user>/stor or /<user/public and point at an
360
+ # actual object.
361
+ #
362
+ # Returns true along with received HTTP headers.
363
+ #
364
+ # If there was an unrecoverable error, throws an exception. On connection or
365
+ # corruption errors, more attempts will be made; the number of attempts can
366
+ # be altered by passing in :attempts.
367
+ def delete_directory(dir_path, opts = {})
368
+ url = obj_url(dir_path)
369
+ headers = gen_headers(opts)
370
+
371
+ attempt(opts[:attempts]) do
372
+ result = @client.delete(url, nil, headers)
373
+ raise unless result.is_a? HTTP::Message
374
+
375
+ return true, result.headers if result.status == 204
376
+ raise_error(result)
377
+ end
378
+ end
379
+
380
+
381
+
382
+ # Creates a snaplink from one object in Manta at a given path to a different
383
+ # path.
384
+ #
385
+ # Both paths should start with /<user>/stor or /<user/public.
386
+ #
387
+ # Returns true along with received HTTP headers.
388
+ #
389
+ # If there was an unrecoverable error, throws an exception. On connection or
390
+ # corruption errors, more attempts will be made; the number of attempts can
391
+ # be altered by passing in :attempts.
392
+ def put_snaplink(orig_path, link_path, opts = {})
393
+ headers = gen_headers(opts)
394
+ headers.push([ 'Content-Type', 'application/json; type=link' ],
395
+ [ 'Location', obj_url(orig_path) ])
396
+
397
+ attempt(opts[:attempts]) do
398
+ result = @client.put(obj_url(link_path), nil, headers)
399
+ raise unless result.is_a? HTTP::Message
400
+
401
+ return true, result.headers if result.status == 204
402
+ raise_error(result)
403
+ end
404
+ end
405
+
406
+
407
+
408
+ # Creates a job in Manta.
409
+ #
410
+ # The job must be a hash, containing at minimum a :phases key. See README.md
411
+ # or the Manta docs to see the format and options for setting up a job on
412
+ # Manta; this method effectively just converts the job hash to JSON and sends
413
+ # to the Manta service.
414
+ #
415
+ # Returns the path for the new job, along with received HTTP headers.
416
+ #
417
+ # If there was an unrecoverable error, throws an exception. On connection or
418
+ # corruption errors, more attempts will be made; the number of attempts can
419
+ # be altered by passing in :attempts.
420
+ def create_job(job, opts = {})
421
+ raise ArgumentError unless job[:phases] || job['phases']
422
+
423
+ headers = gen_headers(opts)
424
+ headers.push([ 'Content-Type', 'application/json; type=job' ])
425
+ data = job.to_json
426
+
427
+ attempt(opts[:attempts]) do
428
+ result = @client.post(job_url(), data, headers)
429
+ raise unless result.is_a? HTTP::Message
430
+
431
+ if result.status == 201
432
+ location = result.headers['Location']
433
+ raise unless location
434
+
435
+ return location, result.headers
436
+ end
437
+
438
+ raise_error(result)
439
+ end
440
+ end
441
+
442
+
443
+
444
+ # Gets various information about a job in Manta at a given path.
445
+ #
446
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
447
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
448
+ #
449
+ # Returns a hash with job information, along with received HTTP headers.
450
+ #
451
+ # If there was an unrecoverable error, throws an exception. On connection or
452
+ # corruption errors, more attempts will be made; the number of attempts can
453
+ # be altered by passing in :attempts.
454
+ def get_job(job_path, opts = {})
455
+ url = job_url(job_path, '/live/status')
456
+ headers = gen_headers(opts)
457
+
458
+ attempt(opts[:attempts]) do
459
+ method = opts[:head] ? :head : :get
460
+ result = @client.send(method, url, nil, headers)
461
+ raise unless result.is_a? HTTP::Message
462
+
463
+ if result.status == 200
464
+ raise unless result.headers['Content-Type'] == 'application/json'
465
+
466
+ return true, result.headers if method == :head
467
+
468
+ job = JSON.parse(result.body)
469
+ return job, result.headers
470
+ end
471
+
472
+ raise_error(result)
473
+ end
474
+ end
475
+
476
+
477
+
478
+ # Gets errors that occured during the execution of a job in Manta at a given
479
+ # path.
480
+ #
481
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
482
+ # :head => true can optionally be passed in to do a HEAD instead of a GET.
483
+ #
484
+ # Returns an array of hashes, each hash containing information about an
485
+ # error; this information is best-effort by Manta, so it may not be complete.
486
+ # Also returns received HTTP headers.
487
+ #
488
+ # If there was an unrecoverable error, throws an exception. On connection or
489
+ # corruption errors, more attempts will be made; the number of attempts can
490
+ # be altered by passing in :attempts.
491
+ def get_job_errors(job_path, opts = {})
492
+ url = job_url(job_path, '/live/err')
493
+ headers = gen_headers(opts)
494
+
495
+ attempt(opts[:attempts]) do
496
+ method = opts[:head] ? :head : :get
497
+ result = @client.send(method, url, nil, headers)
498
+ raise unless result.is_a? HTTP::Message
499
+
500
+ if result.status == 200
501
+ raise unless result.headers['Content-Type'] ==
502
+ 'application/x-json-stream; type=job-error'
503
+
504
+ return true, result.headers if method == :head
505
+
506
+ json_chunks = result.body.split("\n")
507
+ errors = json_chunks.map { |i| JSON.parse(i) }
508
+
509
+ return errors, result.headers
510
+ end
511
+
512
+ raise_error(result)
513
+ end
514
+ end
515
+
516
+
517
+
518
+ # Cancels a running job in Manta at a given path.
519
+ #
520
+ # The path must start with /<user>/jobs/<job UUID> and point at an actual job.
521
+ #
522
+ # Returns true, along with received HTTP headers.
523
+ #
524
+ # If there was an unrecoverable error, throws an exception. On connection or
525
+ # corruption errors, more attempts will be made; the number of attempts can
526
+ # be altered by passing in :attempts.
527
+ def cancel_job(job_path, opts = {})
528
+ url = job_url(job_path, 'live/cancel')
529
+
530
+ body = '{}'
531
+
532
+ opts[:data] = body
533
+
534
+ headers = gen_headers(opts)
535
+
536
+ headers << [ 'Accept', 'application/json' ]
537
+ headers << [ 'Content-Type', 'application/json']
538
+ headers << [ 'Content-Length', body.bytesize ]
539
+
540
+ args = {
541
+ header: headers,
542
+ body: body
543
+ }
544
+
545
+ attempt(opts[:attempts]) do
546
+ result = @client.post(url, args)
547
+ raise unless result.is_a? HTTP::Message
548
+
549
+ return true, result.headers if result.status == 202
550
+ raise_error(result)
551
+ end
552
+ end
553
+
554
+
555
+
556
+ # Adds objects for a running job in Manta to process.
557
+ #
558
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
559
+ # running job. The obj_paths must be an array of paths, starting with
560
+ # /<user>/stor or /<user>/public, pointing at actual objects.
561
+ #
562
+ # Returns true, along with received HTTP headers.
563
+ #
564
+ # If there was an unrecoverable error, throws an exception. On connection or
565
+ # corruption errors, more attempts will be made; the number of attempts can
566
+ # be altered by passing in :attempts.
567
+ def add_job_keys(job_path, obj_paths, opts = {})
568
+ url = job_url(job_path, '/live/in')
569
+ headers = gen_headers(opts)
570
+ headers.push([ 'Content-Type', 'text/plain' ])
571
+
572
+ data = obj_paths.join("\n")
573
+
574
+ attempt(opts[:attempts]) do
575
+ result = @client.post(url, data, headers)
576
+ raise unless result.is_a? HTTP::Message
577
+
578
+ return true, result.headers if result.status == 204
579
+ raise_error(result)
580
+ end
581
+ end
582
+
583
+
584
+
585
+ # Inform Manta that no more objects will be added for processing by a job,
586
+ # and that the job should finish all phases and terminate.
587
+ #
588
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
589
+ # running job.
590
+ #
591
+ # Returns true, along with received HTTP headers.
592
+ #
593
+ # If there was an unrecoverable error, throws an exception. On connection or
594
+ # corruption errors, more attempts will be made; the number of attempts can
595
+ # be altered by passing in :attempts.
596
+ def end_job_input(job_path, opts = {})
597
+ url = job_url(job_path, '/live/in/end')
598
+ headers = gen_headers(opts)
599
+
600
+ attempt(opts[:attempts]) do
601
+ result = @client.post(url, nil, headers)
602
+ raise unless result.is_a? HTTP::Message
603
+
604
+ return true, result.headers if result.status == 202
605
+ raise_error(result)
606
+ end
607
+ end
608
+
609
+
610
+
611
+ # Get a list of objects that have been given to a Manta job for processing.
612
+ #
613
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
614
+ # running job.
615
+ #
616
+ # Returns an array of object paths, along with received HTTP headers.
617
+ #
618
+ # If there was an unrecoverable error, throws an exception. On connection or
619
+ # corruption errors, more attempts will be made; the number of attempts can
620
+ # be altered by passing in :attempts.
621
+ def get_job_input(job_path, opts = {})
622
+ get_job_state_streams(:in, job_path, opts)
623
+ end
624
+
625
+
626
+
627
+ # Get a list of objects that contain the intermediate results of a running
628
+ # Manta job.
629
+ #
630
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
631
+ # running job.
632
+ #
633
+ # Returns an array of object paths, along with received HTTP headers.
634
+ #
635
+ # If there was an unrecoverable error, throws an exception. On connection or
636
+ # corruption errors, more attempts will be made; the number of attempts can
637
+ # be altered by passing in :attempts.
638
+ def get_job_output(job_path, opts = {})
639
+ get_job_state_streams(:out, job_path, opts)
640
+ end
641
+
642
+
643
+
644
+ # Get a list of objects that had failures during processing in a Manta job.
645
+ #
646
+ # The job_path must start with /<user>/jobs/<job UUID> and point at an actual
647
+ # running job.
648
+ #
649
+ # Returns an array of object paths, along with received HTTP headers.
650
+ #
651
+ # If there was an unrecoverable error, throws an exception. On connection or
652
+ # corruption errors, more attempts will be made; the number of attempts can
653
+ # be altered by passing in :attempts.
654
+ def get_job_failures(job_path, opts = {})
655
+ get_job_state_streams(:fail, job_path, opts)
656
+ end
657
+
658
+
659
+
660
+ # Get lists of Manta jobs.
661
+ #
662
+ # The state indicates which kind of jobs to return. :running is for jobs
663
+ # that are currently processing, :done and :all should be obvious. Be careful
664
+ # of the latter two if you've run a lot of jobs -- the list could be quite
665
+ # long.
666
+ #
667
+ # Returns an array of hashes, each hash containing some information about a
668
+ # job. Also returns received HTTP headers.
669
+ #
670
+ # If there was an unrecoverable error, throws an exception. On connection or
671
+ # corruption errors, more attempts will be made; the number of attempts can
672
+ # be altered by passing in :attempts.
673
+ def list_jobs(state, opts = {})
674
+ raise ArgumentError unless [:all, :running, :done].include? state
675
+ state = nil if state == :all
676
+
677
+ headers = gen_headers(opts)
678
+
679
+ attempt(opts[:attempts]) do
680
+ # method = opts[:head] ? :head : :get
681
+ method = :get # until added to Manta service
682
+ result = @client.send(method, job_url(), { :state => state }, headers)
683
+ raise unless result.is_a? HTTP::Message
684
+
685
+ if result.status == 200
686
+ # return true, result.headers if method == :head
687
+ return [], result.headers if result.body.size == 0
688
+
689
+ raise unless result.headers['Content-Type'] ==
690
+ 'application/x-json-stream; type=job'
691
+
692
+ json_chunks = result.body.split("\n")
693
+ job_entries = json_chunks.map { |i| JSON.parse(i) }
694
+
695
+ return job_entries, result.headers
696
+ end
697
+
698
+ raise_error(result)
699
+ end
700
+ end
701
+
702
+
703
+
704
+ # Generates a signed URL which can be used by unauthenticated users to
705
+ # make a request to Manta at the given path. This is typically used to GET
706
+ # an object, or to make a CORS preflighted PUT request.
707
+ #
708
+ # expires is a Time object or integer representing time after epoch; this
709
+ # determines how long the signed URL will be valid for. The method is either a
710
+ # single HTTP method (:get, :put, :post, :delete, :options) or a list of such
711
+ # methods that the signed URL is allowed to be used for. The path must start
712
+ # with /<user>/stor. Lastly, the optional args is an array containing pairs of
713
+ # query args that will be appended at the end of the URL.
714
+ #
715
+ # The returned URL is signed, and can be used either over HTTP or HTTPS until
716
+ # it reaches the expiry date.
717
+ def gen_signed_url(expires, method, path, args=[])
718
+ methods = method.is_a?(Array) ? method : [method]
719
+ raise ArgumentError unless (methods - [:get, :put, :post, :delete, :options]).empty?
720
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
721
+
722
+ key_id = '/%s/keys/%s' % [user_path, @fingerprint]
723
+
724
+ args.push([ 'expires', expires.to_i ])
725
+ args.push([ 'algorithm', @digest_name ])
726
+ args.push([ 'keyId', key_id ])
727
+
728
+ method = methods.map {|m| m.to_s.upcase }.sort.join(",")
729
+ host = URI.encode(@host.split('/').last)
730
+ path = URI.encode(path)
731
+
732
+ args.push(['method', method]) if methods.count > 1
733
+
734
+ encoded_args = args.sort.map do |key, val|
735
+ # to comply with RFC 3986
736
+ CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
737
+ end.join('&')
738
+
739
+ plaintext = "#{method}\n#{host}\n#{path}\n#{encoded_args}"
740
+ signature = @priv_key.sign(@digest, plaintext)
741
+ encoded_signature = CGI.escape(Base64.strict_encode64(signature))
742
+
743
+ host + path + '?' + encoded_args + '&signature=' + encoded_signature
744
+ end
745
+
746
+
747
+
748
+ # Create some Manta error classes
749
+ class MantaClientError < StandardError; end
750
+ for class_name in ERROR_CLASSES
751
+ MantaClient.const_set(class_name, Class.new(MantaClientError))
752
+ end
753
+
754
+
755
+
756
+ # Creates a qualified user path consisting of the user and subuser if the
757
+ # subuser is present. Otherwise, it returns the user
758
+ def user_path
759
+ @subuser ? "#{@user}/#{@subuser}" : @user
760
+ end
761
+
762
+
763
+
764
+ # ---------------------------------------------------------------------------
765
+ protected
766
+
767
+
768
+
769
+ # Fetch lists of objects that have a given status.
770
+ #
771
+ # type takes one of three values (:in, :out, fail), path must start with
772
+ # /<user>/jobs/<job UUID> and point at an actual job.
773
+ #
774
+ # Returns an array of object paths, along with received HTTP headers.
775
+ #
776
+ # If there was an unrecoverable error, throws an exception. On connection or
777
+ # corruption errors, more attempts will be made; the number of attempts can
778
+ # be altered by passing in :attempts.
779
+ def get_job_state_streams(type, path, opts)
780
+ raise ArgumentError unless [:in, :out, :fail].include? type
781
+
782
+ url = job_url(path, '/live/' + type.to_s)
783
+ headers = gen_headers(opts)
784
+
785
+ attempt(opts[:attempts]) do
786
+ #method = opts[:head] ? :head : :get
787
+ method = :get # until added to Manta service
788
+ result = @client.send(method, url, nil, headers)
789
+ raise unless result.is_a? HTTP::Message
790
+
791
+ if result.status == 200
792
+ raise unless result.headers['Content-Type'] == 'text/plain'
793
+ return true, result.headers if method == :head
794
+ paths = result.body.split("\n")
795
+ return paths, result.headers
796
+ end
797
+
798
+ raise_error(result)
799
+ end
800
+ end
801
+
802
+
803
+
804
+ # Returns a full URL for a given path to an object.
805
+ def obj_url(path)
806
+ raise ArgumentError unless path =~ OBJ_PATH_REGEX
807
+
808
+ URI.encode(@host + path)
809
+ end
810
+
811
+
812
+
813
+ # Returns a full URL for a given path to a job.
814
+ def job_url(*args)
815
+ path = if args.size == 0
816
+ @job_base
817
+ else
818
+ raise ArgumentError unless args.first =~ JOB_PATH_REGEX
819
+ args.join('/')
820
+ end
821
+
822
+ URI.encode(@host + path)
823
+ end
824
+
825
+
826
+
827
+ # Executes a block. If there is a connection- or corruption-related exception
828
+ # the block will be reexecuted up to the `tries' argument. It will sleep
829
+ # for an exponentially-increasing number of seconds between retries.
830
+ def attempt(tries, &blk)
831
+ if tries
832
+ raise ArgumentError unless tries > 0
833
+ else
834
+ tries ||= @attempts
835
+ end
836
+
837
+ attempt = 1
838
+
839
+ while true
840
+ begin
841
+ return yield blk
842
+ rescue Errno::ECONNREFUSED, HTTPClient::TimeoutError,
843
+ CorruptResult => e
844
+ raise e if attempt == tries
845
+ sleep 2 ** attempt
846
+ attempt += 1
847
+ end
848
+ end
849
+ end
850
+
851
+
852
+
853
+ # Creates headers to be given to the HTTP client and sent to the Manta
854
+ # service. The most important is the Authorization header, without which
855
+ # none of this class would work.
856
+ def gen_headers(opts)
857
+ now = Time.now.httpdate
858
+ sig = gen_signature('date: ' + now)
859
+
860
+ headers = [[ 'Date', now ],
861
+ [ 'Authorization', sig ],
862
+ [ 'User-Agent', HTTP_AGENT ],
863
+ [ 'Accept-Version', '~1.0' ]]
864
+
865
+
866
+ # headers for conditional requests (dates)
867
+ for arg, conditional in [[:if_modified_since, 'If-Modified-Since' ],
868
+ [:if_unmodified_since, 'If-Unmodified-Since']]
869
+ date = opts[arg]
870
+ next unless date
871
+
872
+ date = Time.parse(date.to_s) unless date.kind_of? Time
873
+ headers.push([conditional, date])
874
+ end
875
+
876
+ # headers for conditional requests (etags)
877
+ for arg, conditional in [[:if_match, 'If-Match' ],
878
+ [:if_none_match, 'If-None-Match']]
879
+ etag = opts[arg]
880
+ next unless etag
881
+
882
+ raise ArgumentError unless etag.kind_of? String
883
+ headers.push([conditional, etag])
884
+ end
885
+
886
+ origin = opts[:origin]
887
+ if origin
888
+ raise ArgumentError unless origin == 'null' || origin =~ CORS_ORIGIN_REGEX
889
+ headers.push([ 'Origin', origin ])
890
+ end
891
+
892
+ # add md5 hash when sending data
893
+ data = opts[:data]
894
+ if data
895
+ md5 = Digest::MD5.base64digest(data)
896
+ headers.push([ 'Content-MD5', md5 ])
897
+ end
898
+
899
+ return headers
900
+ end
901
+
902
+
903
+
904
+ # Do some sanity checks and create CORS-related headers
905
+ #
906
+ # For more details, see http://www.w3.org/TR/cors/ and
907
+ # https://developer.mozilla.org/en-US/docs/HTTP/Access_control_CORS#Access-Control-Expose-Headers
908
+ def gen_cors_headers(opts)
909
+ headers = []
910
+
911
+ allow_credentials = opts[:access_control_allow_credentials]
912
+ if allow_credentials
913
+ allow_credentials = allow_credentials.to_s
914
+ raise ArgumentError unless allow_credentials == 'true' ||
915
+ allow_credentials == 'false'
916
+ headers.push([ 'Access-Control-Allow-Credentials', allow_credentials ])
917
+ end
918
+
919
+ allow_headers = opts[:access_control_allow_headers]
920
+ if allow_headers
921
+ raise ArgumentError unless allow_headers =~ CORS_HEADERS_REGEX
922
+ allow_headers = allow_headers.split(', ').map(&:downcase).sort.join(', ')
923
+ headers.push([ 'Access-Control-Allow-Headers', allow_headers ])
924
+ end
925
+
926
+ allow_methods = opts[:access_control_allow_methods]
927
+ if allow_methods
928
+ raise ArgumentError unless allow_methods.kind_of? String
929
+
930
+ unknown_methods = allow_methods.split(', ').reject do |str|
931
+ CORS_METHODS.include? str
932
+ end
933
+ raise ArgumentError unless unknown_methods.size == 0
934
+
935
+ headers.push([ 'Access-Control-Allow-Methods', allow_methods ])
936
+ end
937
+
938
+ allow_origin = opts[:access_control_allow_origin]
939
+ if allow_origin
940
+ raise ArgumentError unless allow_origin.kind_of? String
941
+ raise ArgumentError unless allow_origin == '*' ||
942
+ allow_origin == 'null' ||
943
+ allow_origin =~ CORS_ORIGIN_REGEX
944
+ headers.push([ 'Access-Control-Allow-Origin', allow_origin ])
945
+ end
946
+
947
+ expose_headers = opts[:access_control_expose_headers]
948
+ if expose_headers
949
+ raise ArgumentError unless expose_headers =~ CORS_HEADERS_REGEX
950
+ expose_headers = expose_headers.split(', ').map(&:downcase).sort.join(', ')
951
+ headers.push([ 'Access-Control-Expose-Headers', expose_headers ])
952
+ end
953
+
954
+ max_age = opts[:access_control_max_age]
955
+ if max_age
956
+ raise ArgumentError unless max_age.kind_of?(Integer) && max_age >= 0
957
+ headers.push([ 'Access-Control-Max-Age', max_age.to_s ])
958
+ end
959
+
960
+ headers
961
+ end
962
+
963
+ # Given a chunk of data, creates an HTTP signature which the Manta service
964
+ # understands and uses for authentication.
965
+ def gen_signature(data)
966
+ raise ArgumentError unless data
967
+
968
+ sig = @priv_key.sign(@digest, data)
969
+ base64sig = Base64.strict_encode64(sig)
970
+
971
+ return HTTP_SIGNATURE % [user_path, @fingerprint, @digest_name, base64sig]
972
+ end
973
+
974
+
975
+
976
+ # Raises an appropriate exception given the HTTP response. If a 40* is
977
+ # returned, attempts to look up an appropriate error class and raise,
978
+ # otherwise raises an UnknownError.
979
+ def raise_error(result)
980
+ raise unless result.is_a? HTTP::Message
981
+
982
+ err = JSON.parse(result.body)
983
+ klass = MantaClient.const_get err['code']
984
+ raise klass, err['message']
985
+ rescue NameError, TypeError, JSON::ParserError
986
+ raise UnknownError, result.status.to_s + ': ' + result.body
987
+ end
988
+ end
989
+ end