s3_cmd_bin 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +28 -0
  5. data/Rakefile +1 -0
  6. data/lib/s3_cmd_bin/version.rb +3 -0
  7. data/lib/s3_cmd_bin.rb +15 -0
  8. data/resources/ChangeLog +1462 -0
  9. data/resources/INSTALL +97 -0
  10. data/resources/LICENSE +339 -0
  11. data/resources/MANIFEST.in +2 -0
  12. data/resources/Makefile +4 -0
  13. data/resources/NEWS +234 -0
  14. data/resources/README +342 -0
  15. data/resources/S3/ACL.py +224 -0
  16. data/resources/S3/ACL.pyc +0 -0
  17. data/resources/S3/AccessLog.py +92 -0
  18. data/resources/S3/AccessLog.pyc +0 -0
  19. data/resources/S3/BidirMap.py +42 -0
  20. data/resources/S3/BidirMap.pyc +0 -0
  21. data/resources/S3/CloudFront.py +773 -0
  22. data/resources/S3/CloudFront.pyc +0 -0
  23. data/resources/S3/Config.py +294 -0
  24. data/resources/S3/Config.pyc +0 -0
  25. data/resources/S3/ConnMan.py +71 -0
  26. data/resources/S3/ConnMan.pyc +0 -0
  27. data/resources/S3/Exceptions.py +88 -0
  28. data/resources/S3/Exceptions.pyc +0 -0
  29. data/resources/S3/FileDict.py +53 -0
  30. data/resources/S3/FileDict.pyc +0 -0
  31. data/resources/S3/FileLists.py +517 -0
  32. data/resources/S3/FileLists.pyc +0 -0
  33. data/resources/S3/HashCache.py +53 -0
  34. data/resources/S3/HashCache.pyc +0 -0
  35. data/resources/S3/MultiPart.py +137 -0
  36. data/resources/S3/MultiPart.pyc +0 -0
  37. data/resources/S3/PkgInfo.py +14 -0
  38. data/resources/S3/PkgInfo.pyc +0 -0
  39. data/resources/S3/Progress.py +173 -0
  40. data/resources/S3/Progress.pyc +0 -0
  41. data/resources/S3/S3.py +979 -0
  42. data/resources/S3/S3.pyc +0 -0
  43. data/resources/S3/S3Uri.py +223 -0
  44. data/resources/S3/S3Uri.pyc +0 -0
  45. data/resources/S3/SimpleDB.py +178 -0
  46. data/resources/S3/SortedDict.py +66 -0
  47. data/resources/S3/SortedDict.pyc +0 -0
  48. data/resources/S3/Utils.py +462 -0
  49. data/resources/S3/Utils.pyc +0 -0
  50. data/resources/S3/__init__.py +0 -0
  51. data/resources/S3/__init__.pyc +0 -0
  52. data/resources/TODO +52 -0
  53. data/resources/artwork/AtomicClockRadio.ttf +0 -0
  54. data/resources/artwork/TypeRa.ttf +0 -0
  55. data/resources/artwork/site-top-full-size.xcf +0 -0
  56. data/resources/artwork/site-top-label-download.png +0 -0
  57. data/resources/artwork/site-top-label-s3cmd.png +0 -0
  58. data/resources/artwork/site-top-label-s3sync.png +0 -0
  59. data/resources/artwork/site-top-s3tools-logo.png +0 -0
  60. data/resources/artwork/site-top.jpg +0 -0
  61. data/resources/artwork/site-top.png +0 -0
  62. data/resources/artwork/site-top.xcf +0 -0
  63. data/resources/format-manpage.pl +196 -0
  64. data/resources/magic +63 -0
  65. data/resources/run-tests.py +537 -0
  66. data/resources/s3cmd +2116 -0
  67. data/resources/s3cmd.1 +435 -0
  68. data/resources/s3db +55 -0
  69. data/resources/setup.cfg +2 -0
  70. data/resources/setup.py +80 -0
  71. data/resources/testsuite.tar.gz +0 -0
  72. data/resources/upload-to-sf.sh +7 -0
  73. data/s3_cmd_bin.gemspec +23 -0
  74. metadata +152 -0
@@ -0,0 +1,979 @@
1
+ ## Amazon S3 manager
2
+ ## Author: Michal Ludvig <michal@logix.cz>
3
+ ## http://www.logix.cz/michal
4
+ ## License: GPL Version 2
5
+
6
+ import sys
7
+ import os, os.path
8
+ import time
9
+ import httplib
10
+ import logging
11
+ import mimetypes
12
+ import re
13
+ from logging import debug, info, warning, error
14
+ from stat import ST_SIZE
15
+
16
+ try:
17
+ from hashlib import md5
18
+ except ImportError:
19
+ from md5 import md5
20
+
21
+ from Utils import *
22
+ from SortedDict import SortedDict
23
+ from AccessLog import AccessLog
24
+ from ACL import ACL, GranteeLogDelivery
25
+ from BidirMap import BidirMap
26
+ from Config import Config
27
+ from Exceptions import *
28
+ from MultiPart import MultiPartUpload
29
+ from S3Uri import S3Uri
30
+ from ConnMan import ConnMan
31
+
32
+ try:
33
+ import magic, gzip
34
+ try:
35
+ ## https://github.com/ahupp/python-magic
36
+ magic_ = magic.Magic(mime=True)
37
+ def mime_magic_file(file):
38
+ return magic_.from_file(file)
39
+ def mime_magic_buffer(buffer):
40
+ return magic_.from_buffer(buffer)
41
+ except TypeError:
42
+ ## http://pypi.python.org/pypi/filemagic
43
+ try:
44
+ magic_ = magic.Magic(flags=magic.MAGIC_MIME)
45
+ def mime_magic_file(file):
46
+ return magic_.id_filename(file)
47
+ def mime_magic_buffer(buffer):
48
+ return magic_.id_buffer(buffer)
49
+ except TypeError:
50
+ ## file-5.11 built-in python bindings
51
+ magic_ = magic.open(magic.MAGIC_MIME)
52
+ magic_.load()
53
+ def mime_magic_file(file):
54
+ return magic_.file(file)
55
+ def mime_magic_buffer(buffer):
56
+ return magic_.buffer(buffer)
57
+
58
+ except AttributeError:
59
+ ## Older python-magic versions
60
+ magic_ = magic.open(magic.MAGIC_MIME)
61
+ magic_.load()
62
+ def mime_magic_file(file):
63
+ return magic_.file(file)
64
+ def mime_magic_buffer(buffer):
65
+ return magic_.buffer(buffer)
66
+
67
+ def mime_magic(file):
68
+ type = mime_magic_file(file)
69
+ if type != "application/x-gzip; charset=binary":
70
+ return (type, None)
71
+ else:
72
+ return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip')
73
+
74
+ except ImportError, e:
75
+ if str(e).find("magic") >= 0:
76
+ magic_message = "Module python-magic is not available."
77
+ else:
78
+ magic_message = "Module python-magic can't be used (%s)." % e.message
79
+ magic_message += " Guessing MIME types based on file extensions."
80
+ magic_warned = False
81
+ def mime_magic(file):
82
+ global magic_warned
83
+ if (not magic_warned):
84
+ warning(magic_message)
85
+ magic_warned = True
86
+ return mimetypes.guess_type(file)
87
+
88
+ __all__ = []
89
+ class S3Request(object):
90
+ def __init__(self, s3, method_string, resource, headers, params = {}):
91
+ self.s3 = s3
92
+ self.headers = SortedDict(headers or {}, ignore_case = True)
93
+ # Add in any extra headers from s3 config object
94
+ if self.s3.config.extra_headers:
95
+ self.headers.update(self.s3.config.extra_headers)
96
+ if len(self.s3.config.access_token)>0:
97
+ self.s3.config.role_refresh()
98
+ self.headers['x-amz-security-token']=self.s3.config.access_token
99
+ self.resource = resource
100
+ self.method_string = method_string
101
+ self.params = params
102
+
103
+ self.update_timestamp()
104
+ self.sign()
105
+
106
+ def update_timestamp(self):
107
+ if self.headers.has_key("date"):
108
+ del(self.headers["date"])
109
+ self.headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
110
+
111
+ def format_param_str(self):
112
+ """
113
+ Format URL parameters from self.params and returns
114
+ ?parm1=val1&parm2=val2 or an empty string if there
115
+ are no parameters. Output of this function should
116
+ be appended directly to self.resource['uri']
117
+ """
118
+ param_str = ""
119
+ for param in self.params:
120
+ if self.params[param] not in (None, ""):
121
+ param_str += "&%s=%s" % (param, self.params[param])
122
+ else:
123
+ param_str += "&%s" % param
124
+ return param_str and "?" + param_str[1:]
125
+
126
+ def sign(self):
127
+ h = self.method_string + "\n"
128
+ h += self.headers.get("content-md5", "")+"\n"
129
+ h += self.headers.get("content-type", "")+"\n"
130
+ h += self.headers.get("date", "")+"\n"
131
+ for header in self.headers.keys():
132
+ if header.startswith("x-amz-"):
133
+ h += header+":"+str(self.headers[header])+"\n"
134
+ if self.resource['bucket']:
135
+ h += "/" + self.resource['bucket']
136
+ h += self.resource['uri']
137
+ debug("SignHeaders: " + repr(h))
138
+ signature = sign_string(h)
139
+
140
+ self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature
141
+
142
+ def get_triplet(self):
143
+ self.update_timestamp()
144
+ self.sign()
145
+ resource = dict(self.resource) ## take a copy
146
+ resource['uri'] += self.format_param_str()
147
+ return (self.method_string, resource, self.headers)
148
+
149
+ class S3(object):
150
+ http_methods = BidirMap(
151
+ GET = 0x01,
152
+ PUT = 0x02,
153
+ HEAD = 0x04,
154
+ DELETE = 0x08,
155
+ POST = 0x10,
156
+ MASK = 0x1F,
157
+ )
158
+
159
+ targets = BidirMap(
160
+ SERVICE = 0x0100,
161
+ BUCKET = 0x0200,
162
+ OBJECT = 0x0400,
163
+ MASK = 0x0700,
164
+ )
165
+
166
+ operations = BidirMap(
167
+ UNDFINED = 0x0000,
168
+ LIST_ALL_BUCKETS = targets["SERVICE"] | http_methods["GET"],
169
+ BUCKET_CREATE = targets["BUCKET"] | http_methods["PUT"],
170
+ BUCKET_LIST = targets["BUCKET"] | http_methods["GET"],
171
+ BUCKET_DELETE = targets["BUCKET"] | http_methods["DELETE"],
172
+ OBJECT_PUT = targets["OBJECT"] | http_methods["PUT"],
173
+ OBJECT_GET = targets["OBJECT"] | http_methods["GET"],
174
+ OBJECT_HEAD = targets["OBJECT"] | http_methods["HEAD"],
175
+ OBJECT_DELETE = targets["OBJECT"] | http_methods["DELETE"],
176
+ OBJECT_POST = targets["OBJECT"] | http_methods["POST"],
177
+ )
178
+
179
+ codes = {
180
+ "NoSuchBucket" : "Bucket '%s' does not exist",
181
+ "AccessDenied" : "Access to bucket '%s' was denied",
182
+ "BucketAlreadyExists" : "Bucket '%s' already exists",
183
+ }
184
+
185
+ ## S3 sometimes sends HTTP-307 response
186
+ redir_map = {}
187
+
188
+ ## Maximum attempts of re-issuing failed requests
189
+ _max_retries = 5
190
+
191
+ def __init__(self, config):
192
+ self.config = config
193
+
194
+ def get_hostname(self, bucket):
195
+ if bucket and check_bucket_name_dns_conformity(bucket):
196
+ if self.redir_map.has_key(bucket):
197
+ host = self.redir_map[bucket]
198
+ else:
199
+ host = getHostnameFromBucket(bucket)
200
+ else:
201
+ host = self.config.host_base
202
+ debug('get_hostname(%s): %s' % (bucket, host))
203
+ return host
204
+
205
+ def set_hostname(self, bucket, redir_hostname):
206
+ self.redir_map[bucket] = redir_hostname
207
+
208
+ def format_uri(self, resource):
209
+ if resource['bucket'] and not check_bucket_name_dns_conformity(resource['bucket']):
210
+ uri = "/%s%s" % (resource['bucket'], resource['uri'])
211
+ else:
212
+ uri = resource['uri']
213
+ if self.config.proxy_host != "":
214
+ uri = "http://%s%s" % (self.get_hostname(resource['bucket']), uri)
215
+ debug('format_uri(): ' + uri)
216
+ return uri
217
+
218
+ ## Commands / Actions
219
+ def list_all_buckets(self):
220
+ request = self.create_request("LIST_ALL_BUCKETS")
221
+ response = self.send_request(request)
222
+ response["list"] = getListFromXml(response["data"], "Bucket")
223
+ return response
224
+
225
+ def bucket_list(self, bucket, prefix = None, recursive = None):
226
+ def _list_truncated(data):
227
+ ## <IsTruncated> can either be "true" or "false" or be missing completely
228
+ is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
229
+ return is_truncated.lower() != "false"
230
+
231
+ def _get_contents(data):
232
+ return getListFromXml(data, "Contents")
233
+
234
+ def _get_common_prefixes(data):
235
+ return getListFromXml(data, "CommonPrefixes")
236
+
237
+ uri_params = {}
238
+ truncated = True
239
+ list = []
240
+ prefixes = []
241
+
242
+ while truncated:
243
+ response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
244
+ current_list = _get_contents(response["data"])
245
+ current_prefixes = _get_common_prefixes(response["data"])
246
+ truncated = _list_truncated(response["data"])
247
+ if truncated:
248
+ if current_list:
249
+ uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"])
250
+ else:
251
+ uri_params['marker'] = self.urlencode_string(current_prefixes[-1]["Prefix"])
252
+ debug("Listing continues after '%s'" % uri_params['marker'])
253
+
254
+ list += current_list
255
+ prefixes += current_prefixes
256
+
257
+ response['list'] = list
258
+ response['common_prefixes'] = prefixes
259
+ return response
260
+
261
+ def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
262
+ if prefix:
263
+ uri_params['prefix'] = self.urlencode_string(prefix)
264
+ if not self.config.recursive and not recursive:
265
+ uri_params['delimiter'] = "/"
266
+ request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
267
+ response = self.send_request(request)
268
+ #debug(response)
269
+ return response
270
+
271
+ def bucket_create(self, bucket, bucket_location = None):
272
+ headers = SortedDict(ignore_case = True)
273
+ body = ""
274
+ if bucket_location and bucket_location.strip().upper() != "US":
275
+ bucket_location = bucket_location.strip()
276
+ if bucket_location.upper() == "EU":
277
+ bucket_location = bucket_location.upper()
278
+ else:
279
+ bucket_location = bucket_location.lower()
280
+ body = "<CreateBucketConfiguration><LocationConstraint>"
281
+ body += bucket_location
282
+ body += "</LocationConstraint></CreateBucketConfiguration>"
283
+ debug("bucket_location: " + body)
284
+ check_bucket_name(bucket, dns_strict = True)
285
+ else:
286
+ check_bucket_name(bucket, dns_strict = False)
287
+ if self.config.acl_public:
288
+ headers["x-amz-acl"] = "public-read"
289
+ request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers)
290
+ response = self.send_request(request, body)
291
+ return response
292
+
293
+ def bucket_delete(self, bucket):
294
+ request = self.create_request("BUCKET_DELETE", bucket = bucket)
295
+ response = self.send_request(request)
296
+ return response
297
+
298
+ def get_bucket_location(self, uri):
299
+ request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?location")
300
+ response = self.send_request(request)
301
+ location = getTextFromXml(response['data'], "LocationConstraint")
302
+ if not location or location in [ "", "US" ]:
303
+ location = "us-east-1"
304
+ elif location == "EU":
305
+ location = "eu-west-1"
306
+ return location
307
+
308
+ def bucket_info(self, uri):
309
+ # For now reports only "Location". One day perhaps more.
310
+ response = {}
311
+ response['bucket-location'] = self.get_bucket_location(uri)
312
+ return response
313
+
314
+ def website_info(self, uri, bucket_location = None):
315
+ headers = SortedDict(ignore_case = True)
316
+ bucket = uri.bucket()
317
+ body = ""
318
+
319
+ request = self.create_request("BUCKET_LIST", bucket = bucket, extra="?website")
320
+ try:
321
+ response = self.send_request(request, body)
322
+ response['index_document'] = getTextFromXml(response['data'], ".//IndexDocument//Suffix")
323
+ response['error_document'] = getTextFromXml(response['data'], ".//ErrorDocument//Key")
324
+ response['website_endpoint'] = self.config.website_endpoint % {
325
+ "bucket" : uri.bucket(),
326
+ "location" : self.get_bucket_location(uri)}
327
+ return response
328
+ except S3Error, e:
329
+ if e.status == 404:
330
+ debug("Could not get /?website - website probably not configured for this bucket")
331
+ return None
332
+ raise
333
+
334
+ def website_create(self, uri, bucket_location = None):
335
+ headers = SortedDict(ignore_case = True)
336
+ bucket = uri.bucket()
337
+ body = '<WebsiteConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">'
338
+ body += ' <IndexDocument>'
339
+ body += (' <Suffix>%s</Suffix>' % self.config.website_index)
340
+ body += ' </IndexDocument>'
341
+ if self.config.website_error:
342
+ body += ' <ErrorDocument>'
343
+ body += (' <Key>%s</Key>' % self.config.website_error)
344
+ body += ' </ErrorDocument>'
345
+ body += '</WebsiteConfiguration>'
346
+
347
+ request = self.create_request("BUCKET_CREATE", bucket = bucket, extra="?website")
348
+ debug("About to send request '%s' with body '%s'" % (request, body))
349
+ response = self.send_request(request, body)
350
+ debug("Received response '%s'" % (response))
351
+
352
+ return response
353
+
354
+ def website_delete(self, uri, bucket_location = None):
355
+ headers = SortedDict(ignore_case = True)
356
+ bucket = uri.bucket()
357
+ body = ""
358
+
359
+ request = self.create_request("BUCKET_DELETE", bucket = bucket, extra="?website")
360
+ debug("About to send request '%s' with body '%s'" % (request, body))
361
+ response = self.send_request(request, body)
362
+ debug("Received response '%s'" % (response))
363
+
364
+ if response['status'] != 204:
365
+ raise S3ResponseError("Expected status 204: %s" % response)
366
+
367
+ return response
368
+
369
+ def add_encoding(self, filename, content_type):
370
+ if content_type.find("charset=") != -1:
371
+ return False
372
+ exts = self.config.add_encoding_exts.split(',')
373
+ if exts[0]=='':
374
+ return False
375
+ parts = filename.rsplit('.',2)
376
+ if len(parts) < 2:
377
+ return False
378
+ ext = parts[1]
379
+ if ext in exts:
380
+ return True
381
+ else:
382
+ return False
383
+
384
+ def object_put(self, filename, uri, extra_headers = None, extra_label = ""):
385
+ # TODO TODO
386
+ # Make it consistent with stream-oriented object_get()
387
+ if uri.type != "s3":
388
+ raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
389
+
390
+ if filename != "-" and not os.path.isfile(filename):
391
+ raise InvalidFileError(u"%s is not a regular file" % unicodise(filename))
392
+ try:
393
+ if filename == "-":
394
+ file = sys.stdin
395
+ size = 0
396
+ else:
397
+ file = open(filename, "rb")
398
+ size = os.stat(filename)[ST_SIZE]
399
+ except (IOError, OSError), e:
400
+ raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror))
401
+
402
+ headers = SortedDict(ignore_case = True)
403
+ if extra_headers:
404
+ headers.update(extra_headers)
405
+
406
+ ## MIME-type handling
407
+ content_type = self.config.mime_type
408
+ content_encoding = None
409
+ if filename != "-" and not content_type and self.config.guess_mime_type:
410
+ (content_type, content_encoding) = mime_magic(filename)
411
+ if not content_type:
412
+ content_type = self.config.default_mime_type
413
+ if not content_encoding:
414
+ content_encoding = self.config.encoding.upper()
415
+
416
+ ## add charset to content type
417
+ if self.add_encoding(filename, content_type) and content_encoding is not None:
418
+ content_type = content_type + "; charset=" + content_encoding
419
+
420
+ headers["content-type"] = content_type
421
+ if content_encoding is not None:
422
+ headers["content-encoding"] = content_encoding
423
+
424
+ ## Other Amazon S3 attributes
425
+ if self.config.acl_public:
426
+ headers["x-amz-acl"] = "public-read"
427
+ if self.config.reduced_redundancy:
428
+ headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
429
+
430
+ ## Multipart decision
431
+ multipart = False
432
+ if not self.config.enable_multipart and filename == "-":
433
+ raise ParameterError("Multi-part upload is required to upload from stdin")
434
+ if self.config.enable_multipart:
435
+ if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-":
436
+ multipart = True
437
+ if multipart:
438
+ # Multipart requests are quite different... drop here
439
+ return self.send_file_multipart(file, headers, uri, size)
440
+
441
+ ## Not multipart...
442
+ headers["content-length"] = size
443
+ request = self.create_request("OBJECT_PUT", uri = uri, headers = headers)
444
+ labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label }
445
+ response = self.send_file(request, file, labels)
446
+ return response
447
+
448
+ def object_get(self, uri, stream, start_position = 0, extra_label = ""):
449
+ if uri.type != "s3":
450
+ raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
451
+ request = self.create_request("OBJECT_GET", uri = uri)
452
+ labels = { 'source' : unicodise(uri.uri()), 'destination' : unicodise(stream.name), 'extra' : extra_label }
453
+ response = self.recv_file(request, stream, labels, start_position)
454
+ return response
455
+
456
+ def object_delete(self, uri):
457
+ if uri.type != "s3":
458
+ raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
459
+ request = self.create_request("OBJECT_DELETE", uri = uri)
460
+ response = self.send_request(request)
461
+ return response
462
+
463
+ def object_copy(self, src_uri, dst_uri, extra_headers = None):
464
+ if src_uri.type != "s3":
465
+ raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
466
+ if dst_uri.type != "s3":
467
+ raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type)
468
+ headers = SortedDict(ignore_case = True)
469
+ headers['x-amz-copy-source'] = "/%s/%s" % (src_uri.bucket(), self.urlencode_string(src_uri.object()))
470
+ ## TODO: For now COPY, later maybe add a switch?
471
+ headers['x-amz-metadata-directive'] = "COPY"
472
+ if self.config.acl_public:
473
+ headers["x-amz-acl"] = "public-read"
474
+ if self.config.reduced_redundancy:
475
+ headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
476
+ # if extra_headers:
477
+ # headers.update(extra_headers)
478
+ request = self.create_request("OBJECT_PUT", uri = dst_uri, headers = headers)
479
+ response = self.send_request(request)
480
+ return response
481
+
482
+ def object_move(self, src_uri, dst_uri, extra_headers = None):
483
+ response_copy = self.object_copy(src_uri, dst_uri, extra_headers)
484
+ debug("Object %s copied to %s" % (src_uri, dst_uri))
485
+ if getRootTagName(response_copy["data"]) == "CopyObjectResult":
486
+ response_delete = self.object_delete(src_uri)
487
+ debug("Object %s deleted" % src_uri)
488
+ return response_copy
489
+
490
+ def object_info(self, uri):
491
+ request = self.create_request("OBJECT_HEAD", uri = uri)
492
+ response = self.send_request(request)
493
+ return response
494
+
495
+ def get_acl(self, uri):
496
+ if uri.has_object():
497
+ request = self.create_request("OBJECT_GET", uri = uri, extra = "?acl")
498
+ else:
499
+ request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?acl")
500
+
501
+ response = self.send_request(request)
502
+ acl = ACL(response['data'])
503
+ return acl
504
+
505
+ def set_acl(self, uri, acl):
506
+ if uri.has_object():
507
+ request = self.create_request("OBJECT_PUT", uri = uri, extra = "?acl")
508
+ else:
509
+ request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?acl")
510
+
511
+ body = str(acl)
512
+ debug(u"set_acl(%s): acl-xml: %s" % (uri, body))
513
+ response = self.send_request(request, body)
514
+ return response
515
+
516
+ def get_policy(self, uri):
517
+ request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?policy")
518
+ response = self.send_request(request)
519
+ return response['data']
520
+
521
+ def set_policy(self, uri, policy):
522
+ headers = {}
523
+ # TODO check policy is proper json string
524
+ headers['content-type'] = 'application/json'
525
+ request = self.create_request("BUCKET_CREATE", uri = uri,
526
+ extra = "?policy", headers=headers)
527
+ body = policy
528
+ debug(u"set_policy(%s): policy-json: %s" % (uri, body))
529
+ request.sign()
530
+ response = self.send_request(request, body=body)
531
+ return response
532
+
533
+ def delete_policy(self, uri):
534
+ request = self.create_request("BUCKET_DELETE", uri = uri, extra = "?policy")
535
+ debug(u"delete_policy(%s)" % uri)
536
+ response = self.send_request(request)
537
+ return response
538
+
539
+ def get_accesslog(self, uri):
540
+ request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging")
541
+ response = self.send_request(request)
542
+ accesslog = AccessLog(response['data'])
543
+ return accesslog
544
+
545
+ def set_accesslog_acl(self, uri):
546
+ acl = self.get_acl(uri)
547
+ debug("Current ACL(%s): %s" % (uri.uri(), str(acl)))
548
+ acl.appendGrantee(GranteeLogDelivery("READ_ACP"))
549
+ acl.appendGrantee(GranteeLogDelivery("WRITE"))
550
+ debug("Updated ACL(%s): %s" % (uri.uri(), str(acl)))
551
+ self.set_acl(uri, acl)
552
+
553
+ def set_accesslog(self, uri, enable, log_target_prefix_uri = None, acl_public = False):
554
+ request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?logging")
555
+ accesslog = AccessLog()
556
+ if enable:
557
+ accesslog.enableLogging(log_target_prefix_uri)
558
+ accesslog.setAclPublic(acl_public)
559
+ else:
560
+ accesslog.disableLogging()
561
+ body = str(accesslog)
562
+ debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body))
563
+ try:
564
+ response = self.send_request(request, body)
565
+ except S3Error, e:
566
+ if e.info['Code'] == "InvalidTargetBucketForLogging":
567
+ info("Setting up log-delivery ACL for target bucket.")
568
+ self.set_accesslog_acl(S3Uri("s3://%s" % log_target_prefix_uri.bucket()))
569
+ response = self.send_request(request, body)
570
+ else:
571
+ raise
572
+ return accesslog, response
573
+
574
+ ## Low level methods
575
+ def urlencode_string(self, string, urlencoding_mode = None):
576
+ if type(string) == unicode:
577
+ string = string.encode("utf-8")
578
+
579
+ if urlencoding_mode is None:
580
+ urlencoding_mode = self.config.urlencoding_mode
581
+
582
+ if urlencoding_mode == "verbatim":
583
+ ## Don't do any pre-processing
584
+ return string
585
+
586
+ encoded = ""
587
+ ## List of characters that must be escaped for S3
588
+ ## Haven't found this in any official docs
589
+ ## but my tests show it's more less correct.
590
+ ## If you start getting InvalidSignature errors
591
+ ## from S3 check the error headers returned
592
+ ## from S3 to see whether the list hasn't
593
+ ## changed.
594
+ for c in string: # I'm not sure how to know in what encoding
595
+ # 'object' is. Apparently "type(object)==str"
596
+ # but the contents is a string of unicode
597
+ # bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k'
598
+ # Don't know what it will do on non-utf8
599
+ # systems.
600
+ # [hope that sounds reassuring ;-)]
601
+ o = ord(c)
602
+ if (o < 0x20 or o == 0x7f):
603
+ if urlencoding_mode == "fixbucket":
604
+ encoded += "%%%02X" % o
605
+ else:
606
+ error(u"Non-printable character 0x%02x in: %s" % (o, string))
607
+ error(u"Please report it to s3tools-bugs@lists.sourceforge.net")
608
+ encoded += replace_nonprintables(c)
609
+ elif (o == 0x20 or # Space and below
610
+ o == 0x22 or # "
611
+ o == 0x23 or # #
612
+ o == 0x25 or # % (escape character)
613
+ o == 0x26 or # &
614
+ o == 0x2B or # + (or it would become <space>)
615
+ o == 0x3C or # <
616
+ o == 0x3E or # >
617
+ o == 0x3F or # ?
618
+ o == 0x60 or # `
619
+ o >= 123): # { and above, including >= 128 for UTF-8
620
+ encoded += "%%%02X" % o
621
+ else:
622
+ encoded += c
623
+ debug("String '%s' encoded to '%s'" % (string, encoded))
624
+ return encoded
625
+
626
+ def create_request(self, operation, uri = None, bucket = None, object = None, headers = None, extra = None, **params):
627
+ resource = { 'bucket' : None, 'uri' : "/" }
628
+
629
+ if uri and (bucket or object):
630
+ raise ValueError("Both 'uri' and either 'bucket' or 'object' parameters supplied")
631
+ ## If URI is given use that instead of bucket/object parameters
632
+ if uri:
633
+ bucket = uri.bucket()
634
+ object = uri.has_object() and uri.object() or None
635
+
636
+ if bucket:
637
+ resource['bucket'] = str(bucket)
638
+ if object:
639
+ resource['uri'] = "/" + self.urlencode_string(object)
640
+ if extra:
641
+ resource['uri'] += extra
642
+
643
+ method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"])
644
+
645
+ request = S3Request(self, method_string, resource, headers, params)
646
+
647
+ debug("CreateRequest: resource[uri]=" + resource['uri'])
648
+ return request
649
+
650
+ def _fail_wait(self, retries):
651
+ # Wait a few seconds. The more it fails the more we wait.
652
+ return (self._max_retries - retries + 1) * 3
653
+
654
+ def send_request(self, request, body = None, retries = _max_retries):
655
+ method_string, resource, headers = request.get_triplet()
656
+ debug("Processing request, please wait...")
657
+ if not headers.has_key('content-length'):
658
+ headers['content-length'] = body and len(body) or 0
659
+ try:
660
+ # "Stringify" all headers
661
+ for header in headers.keys():
662
+ headers[header] = str(headers[header])
663
+ conn = ConnMan.get(self.get_hostname(resource['bucket']))
664
+ uri = self.format_uri(resource)
665
+ debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
666
+ conn.c.request(method_string, uri, body, headers)
667
+ response = {}
668
+ http_response = conn.c.getresponse()
669
+ response["status"] = http_response.status
670
+ response["reason"] = http_response.reason
671
+ response["headers"] = convertTupleListToDict(http_response.getheaders())
672
+ response["data"] = http_response.read()
673
+ debug("Response: " + str(response))
674
+ ConnMan.put(conn)
675
+ except ParameterError, e:
676
+ raise
677
+ except Exception, e:
678
+ if retries:
679
+ warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
680
+ warning("Waiting %d sec..." % self._fail_wait(retries))
681
+ time.sleep(self._fail_wait(retries))
682
+ return self.send_request(request, body, retries - 1)
683
+ else:
684
+ raise S3RequestError("Request failed for: %s" % resource['uri'])
685
+
686
+ if response["status"] == 307:
687
+ ## RedirectPermanent
688
+ redir_bucket = getTextFromXml(response['data'], ".//Bucket")
689
+ redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
690
+ self.set_hostname(redir_bucket, redir_hostname)
691
+ warning("Redirected to: %s" % (redir_hostname))
692
+ return self.send_request(request, body)
693
+
694
+ if response["status"] >= 500:
695
+ e = S3Error(response)
696
+ if retries:
697
+ warning(u"Retrying failed request: %s" % resource['uri'])
698
+ warning(unicode(e))
699
+ warning("Waiting %d sec..." % self._fail_wait(retries))
700
+ time.sleep(self._fail_wait(retries))
701
+ return self.send_request(request, body, retries - 1)
702
+ else:
703
+ raise e
704
+
705
+ if response["status"] < 200 or response["status"] > 299:
706
+ raise S3Error(response)
707
+
708
+ return response
709
+
710
+ def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1):
711
+ method_string, resource, headers = request.get_triplet()
712
+ size_left = size_total = headers.get("content-length")
713
+ if self.config.progress_meter:
714
+ progress = self.config.progress_class(labels, size_total)
715
+ else:
716
+ info("Sending file '%s', please wait..." % file.name)
717
+ timestamp_start = time.time()
718
+ try:
719
+ conn = ConnMan.get(self.get_hostname(resource['bucket']))
720
+ conn.c.putrequest(method_string, self.format_uri(resource))
721
+ for header in headers.keys():
722
+ conn.c.putheader(header, str(headers[header]))
723
+ conn.c.endheaders()
724
+ except ParameterError, e:
725
+ raise
726
+ except Exception, e:
727
+ if self.config.progress_meter:
728
+ progress.done("failed")
729
+ if retries:
730
+ warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
731
+ warning("Waiting %d sec..." % self._fail_wait(retries))
732
+ time.sleep(self._fail_wait(retries))
733
+ # Connection error -> same throttle value
734
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
735
+ else:
736
+ raise S3UploadError("Upload failed for: %s" % resource['uri'])
737
+ if buffer == '':
738
+ file.seek(offset)
739
+ md5_hash = md5()
740
+ try:
741
+ while (size_left > 0):
742
+ #debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left))
743
+ if buffer == '':
744
+ data = file.read(min(self.config.send_chunk, size_left))
745
+ else:
746
+ data = buffer
747
+ md5_hash.update(data)
748
+ conn.c.send(data)
749
+ if self.config.progress_meter:
750
+ progress.update(delta_position = len(data))
751
+ size_left -= len(data)
752
+ if throttle:
753
+ time.sleep(throttle)
754
+ md5_computed = md5_hash.hexdigest()
755
+ response = {}
756
+ http_response = conn.c.getresponse()
757
+ response["status"] = http_response.status
758
+ response["reason"] = http_response.reason
759
+ response["headers"] = convertTupleListToDict(http_response.getheaders())
760
+ response["data"] = http_response.read()
761
+ response["size"] = size_total
762
+ ConnMan.put(conn)
763
+ debug(u"Response: %s" % response)
764
+ except ParameterError, e:
765
+ raise
766
+ except Exception, e:
767
+ if self.config.progress_meter:
768
+ progress.done("failed")
769
+ if retries:
770
+ if retries < self._max_retries:
771
+ throttle = throttle and throttle * 5 or 0.01
772
+ warning("Upload failed: %s (%s)" % (resource['uri'], e))
773
+ warning("Retrying on lower speed (throttle=%0.2f)" % throttle)
774
+ warning("Waiting %d sec..." % self._fail_wait(retries))
775
+ time.sleep(self._fail_wait(retries))
776
+ # Connection error -> same throttle value
777
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
778
+ else:
779
+ debug("Giving up on '%s' %s" % (file.name, e))
780
+ raise S3UploadError("Upload failed for: %s" % resource['uri'])
781
+
782
+ timestamp_end = time.time()
783
+ response["elapsed"] = timestamp_end - timestamp_start
784
+ response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
785
+
786
+ if self.config.progress_meter:
787
+ ## Finalising the upload takes some time -> update() progress meter
788
+ ## to correct the average speed. Otherwise people will complain that
789
+ ## 'progress' and response["speed"] are inconsistent ;-)
790
+ progress.update()
791
+ progress.done("done")
792
+
793
+ if response["status"] == 307:
794
+ ## RedirectPermanent
795
+ redir_bucket = getTextFromXml(response['data'], ".//Bucket")
796
+ redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
797
+ self.set_hostname(redir_bucket, redir_hostname)
798
+ warning("Redirected to: %s" % (redir_hostname))
799
+ return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size)
800
+
801
+ # S3 from time to time doesn't send ETag back in a response :-(
802
+ # Force re-upload here.
803
+ if not response['headers'].has_key('etag'):
804
+ response['headers']['etag'] = ''
805
+
806
+ if response["status"] < 200 or response["status"] > 299:
807
+ try_retry = False
808
+ if response["status"] >= 500:
809
+ ## AWS internal error - retry
810
+ try_retry = True
811
+ elif response["status"] >= 400:
812
+ err = S3Error(response)
813
+ ## Retriable client error?
814
+ if err.code in [ 'BadDigest', 'OperationAborted', 'TokenRefreshRequired', 'RequestTimeout' ]:
815
+ try_retry = True
816
+
817
+ if try_retry:
818
+ if retries:
819
+ warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response)))
820
+ warning("Waiting %d sec..." % self._fail_wait(retries))
821
+ time.sleep(self._fail_wait(retries))
822
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
823
+ else:
824
+ warning("Too many failures. Giving up on '%s'" % (file.name))
825
+ raise S3UploadError
826
+
827
+ ## Non-recoverable error
828
+ raise S3Error(response)
829
+
830
+ debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"]))
831
+ if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest():
832
+ warning("MD5 Sums don't match!")
833
+ if retries:
834
+ warning("Retrying upload of %s" % (file.name))
835
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
836
+ else:
837
+ warning("Too many failures. Giving up on '%s'" % (file.name))
838
+ raise S3UploadError
839
+
840
+ return response
841
+
842
+ def send_file_multipart(self, file, headers, uri, size):
843
+ chunk_size = self.config.multipart_chunk_size_mb * 1024 * 1024
844
+ timestamp_start = time.time()
845
+ upload = MultiPartUpload(self, file, uri, headers)
846
+ upload.upload_all_parts()
847
+ response = upload.complete_multipart_upload()
848
+ timestamp_end = time.time()
849
+ response["elapsed"] = timestamp_end - timestamp_start
850
+ response["size"] = size
851
+ response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
852
+ return response
853
+
854
+ def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries):
855
+ method_string, resource, headers = request.get_triplet()
856
+ if self.config.progress_meter:
857
+ progress = self.config.progress_class(labels, 0)
858
+ else:
859
+ info("Receiving file '%s', please wait..." % stream.name)
860
+ timestamp_start = time.time()
861
+ try:
862
+ conn = ConnMan.get(self.get_hostname(resource['bucket']))
863
+ conn.c.putrequest(method_string, self.format_uri(resource))
864
+ for header in headers.keys():
865
+ conn.c.putheader(header, str(headers[header]))
866
+ if start_position > 0:
867
+ debug("Requesting Range: %d .. end" % start_position)
868
+ conn.c.putheader("Range", "bytes=%d-" % start_position)
869
+ conn.c.endheaders()
870
+ response = {}
871
+ http_response = conn.c.getresponse()
872
+ response["status"] = http_response.status
873
+ response["reason"] = http_response.reason
874
+ response["headers"] = convertTupleListToDict(http_response.getheaders())
875
+ debug("Response: %s" % response)
876
+ except ParameterError, e:
877
+ raise
878
+ except Exception, e:
879
+ if self.config.progress_meter:
880
+ progress.done("failed")
881
+ if retries:
882
+ warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
883
+ warning("Waiting %d sec..." % self._fail_wait(retries))
884
+ time.sleep(self._fail_wait(retries))
885
+ # Connection error -> same throttle value
886
+ return self.recv_file(request, stream, labels, start_position, retries - 1)
887
+ else:
888
+ raise S3DownloadError("Download failed for: %s" % resource['uri'])
889
+
890
+ if response["status"] == 307:
891
+ ## RedirectPermanent
892
+ response['data'] = http_response.read()
893
+ redir_bucket = getTextFromXml(response['data'], ".//Bucket")
894
+ redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
895
+ self.set_hostname(redir_bucket, redir_hostname)
896
+ warning("Redirected to: %s" % (redir_hostname))
897
+ return self.recv_file(request, stream, labels)
898
+
899
+ if response["status"] < 200 or response["status"] > 299:
900
+ raise S3Error(response)
901
+
902
+ if start_position == 0:
903
+ # Only compute MD5 on the fly if we're downloading from beginning
904
+ # Otherwise we'd get a nonsense.
905
+ md5_hash = md5()
906
+ size_left = int(response["headers"]["content-length"])
907
+ size_total = start_position + size_left
908
+ current_position = start_position
909
+
910
+ if self.config.progress_meter:
911
+ progress.total_size = size_total
912
+ progress.initial_position = current_position
913
+ progress.current_position = current_position
914
+
915
+ try:
916
+ while (current_position < size_total):
917
+ this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left
918
+ data = http_response.read(this_chunk)
919
+ if len(data) == 0:
920
+ raise S3Error("EOF from S3!")
921
+
922
+ stream.write(data)
923
+ if start_position == 0:
924
+ md5_hash.update(data)
925
+ current_position += len(data)
926
+ ## Call progress meter from here...
927
+ if self.config.progress_meter:
928
+ progress.update(delta_position = len(data))
929
+ ConnMan.put(conn)
930
+ except Exception, e:
931
+ if self.config.progress_meter:
932
+ progress.done("failed")
933
+ if retries:
934
+ warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
935
+ warning("Waiting %d sec..." % self._fail_wait(retries))
936
+ time.sleep(self._fail_wait(retries))
937
+ # Connection error -> same throttle value
938
+ return self.recv_file(request, stream, labels, current_position, retries - 1)
939
+ else:
940
+ raise S3DownloadError("Download failed for: %s" % resource['uri'])
941
+
942
+ stream.flush()
943
+ timestamp_end = time.time()
944
+
945
+ if self.config.progress_meter:
946
+ ## The above stream.flush() may take some time -> update() progress meter
947
+ ## to correct the average speed. Otherwise people will complain that
948
+ ## 'progress' and response["speed"] are inconsistent ;-)
949
+ progress.update()
950
+ progress.done("done")
951
+
952
+ if start_position == 0:
953
+ # Only compute MD5 on the fly if we were downloading from the beginning
954
+ response["md5"] = md5_hash.hexdigest()
955
+ else:
956
+ # Otherwise try to compute MD5 of the output file
957
+ try:
958
+ response["md5"] = hash_file_md5(stream.name)
959
+ except IOError, e:
960
+ if e.errno != errno.ENOENT:
961
+ warning("Unable to open file: %s: %s" % (stream.name, e))
962
+ warning("Unable to verify MD5. Assume it matches.")
963
+ response["md5"] = response["headers"]["etag"]
964
+
965
+ response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0
966
+ response["elapsed"] = timestamp_end - timestamp_start
967
+ response["size"] = current_position
968
+ response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
969
+ if response["size"] != start_position + long(response["headers"]["content-length"]):
970
+ warning("Reported size (%s) does not match received size (%s)" % (
971
+ start_position + response["headers"]["content-length"], response["size"]))
972
+ debug("ReceiveFile: Computed MD5 = %s" % response["md5"])
973
+ if not response["md5match"]:
974
+ warning("MD5 signatures do not match: computed=%s, received=%s" % (
975
+ response["md5"], response["headers"]["etag"]))
976
+ return response
977
+ __all__.append("S3")
978
+
979
+ # vim:et:ts=4:sts=4:ai