s3_cmd_bin 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +28 -0
- data/Rakefile +1 -0
- data/lib/s3_cmd_bin/version.rb +3 -0
- data/lib/s3_cmd_bin.rb +15 -0
- data/resources/ChangeLog +1462 -0
- data/resources/INSTALL +97 -0
- data/resources/LICENSE +339 -0
- data/resources/MANIFEST.in +2 -0
- data/resources/Makefile +4 -0
- data/resources/NEWS +234 -0
- data/resources/README +342 -0
- data/resources/S3/ACL.py +224 -0
- data/resources/S3/ACL.pyc +0 -0
- data/resources/S3/AccessLog.py +92 -0
- data/resources/S3/AccessLog.pyc +0 -0
- data/resources/S3/BidirMap.py +42 -0
- data/resources/S3/BidirMap.pyc +0 -0
- data/resources/S3/CloudFront.py +773 -0
- data/resources/S3/CloudFront.pyc +0 -0
- data/resources/S3/Config.py +294 -0
- data/resources/S3/Config.pyc +0 -0
- data/resources/S3/ConnMan.py +71 -0
- data/resources/S3/ConnMan.pyc +0 -0
- data/resources/S3/Exceptions.py +88 -0
- data/resources/S3/Exceptions.pyc +0 -0
- data/resources/S3/FileDict.py +53 -0
- data/resources/S3/FileDict.pyc +0 -0
- data/resources/S3/FileLists.py +517 -0
- data/resources/S3/FileLists.pyc +0 -0
- data/resources/S3/HashCache.py +53 -0
- data/resources/S3/HashCache.pyc +0 -0
- data/resources/S3/MultiPart.py +137 -0
- data/resources/S3/MultiPart.pyc +0 -0
- data/resources/S3/PkgInfo.py +14 -0
- data/resources/S3/PkgInfo.pyc +0 -0
- data/resources/S3/Progress.py +173 -0
- data/resources/S3/Progress.pyc +0 -0
- data/resources/S3/S3.py +979 -0
- data/resources/S3/S3.pyc +0 -0
- data/resources/S3/S3Uri.py +223 -0
- data/resources/S3/S3Uri.pyc +0 -0
- data/resources/S3/SimpleDB.py +178 -0
- data/resources/S3/SortedDict.py +66 -0
- data/resources/S3/SortedDict.pyc +0 -0
- data/resources/S3/Utils.py +462 -0
- data/resources/S3/Utils.pyc +0 -0
- data/resources/S3/__init__.py +0 -0
- data/resources/S3/__init__.pyc +0 -0
- data/resources/TODO +52 -0
- data/resources/artwork/AtomicClockRadio.ttf +0 -0
- data/resources/artwork/TypeRa.ttf +0 -0
- data/resources/artwork/site-top-full-size.xcf +0 -0
- data/resources/artwork/site-top-label-download.png +0 -0
- data/resources/artwork/site-top-label-s3cmd.png +0 -0
- data/resources/artwork/site-top-label-s3sync.png +0 -0
- data/resources/artwork/site-top-s3tools-logo.png +0 -0
- data/resources/artwork/site-top.jpg +0 -0
- data/resources/artwork/site-top.png +0 -0
- data/resources/artwork/site-top.xcf +0 -0
- data/resources/format-manpage.pl +196 -0
- data/resources/magic +63 -0
- data/resources/run-tests.py +537 -0
- data/resources/s3cmd +2116 -0
- data/resources/s3cmd.1 +435 -0
- data/resources/s3db +55 -0
- data/resources/setup.cfg +2 -0
- data/resources/setup.py +80 -0
- data/resources/testsuite.tar.gz +0 -0
- data/resources/upload-to-sf.sh +7 -0
- data/s3_cmd_bin.gemspec +23 -0
- metadata +152 -0
data/resources/S3/S3.py
ADDED
@@ -0,0 +1,979 @@
|
|
1
|
+
## Amazon S3 manager
|
2
|
+
## Author: Michal Ludvig <michal@logix.cz>
|
3
|
+
## http://www.logix.cz/michal
|
4
|
+
## License: GPL Version 2
|
5
|
+
|
6
|
+
import sys
|
7
|
+
import os, os.path
|
8
|
+
import time
|
9
|
+
import httplib
|
10
|
+
import logging
|
11
|
+
import mimetypes
|
12
|
+
import re
|
13
|
+
from logging import debug, info, warning, error
|
14
|
+
from stat import ST_SIZE
|
15
|
+
|
16
|
+
try:
|
17
|
+
from hashlib import md5
|
18
|
+
except ImportError:
|
19
|
+
from md5 import md5
|
20
|
+
|
21
|
+
from Utils import *
|
22
|
+
from SortedDict import SortedDict
|
23
|
+
from AccessLog import AccessLog
|
24
|
+
from ACL import ACL, GranteeLogDelivery
|
25
|
+
from BidirMap import BidirMap
|
26
|
+
from Config import Config
|
27
|
+
from Exceptions import *
|
28
|
+
from MultiPart import MultiPartUpload
|
29
|
+
from S3Uri import S3Uri
|
30
|
+
from ConnMan import ConnMan
|
31
|
+
|
32
|
+
try:
|
33
|
+
import magic, gzip
|
34
|
+
try:
|
35
|
+
## https://github.com/ahupp/python-magic
|
36
|
+
magic_ = magic.Magic(mime=True)
|
37
|
+
def mime_magic_file(file):
|
38
|
+
return magic_.from_file(file)
|
39
|
+
def mime_magic_buffer(buffer):
|
40
|
+
return magic_.from_buffer(buffer)
|
41
|
+
except TypeError:
|
42
|
+
## http://pypi.python.org/pypi/filemagic
|
43
|
+
try:
|
44
|
+
magic_ = magic.Magic(flags=magic.MAGIC_MIME)
|
45
|
+
def mime_magic_file(file):
|
46
|
+
return magic_.id_filename(file)
|
47
|
+
def mime_magic_buffer(buffer):
|
48
|
+
return magic_.id_buffer(buffer)
|
49
|
+
except TypeError:
|
50
|
+
## file-5.11 built-in python bindings
|
51
|
+
magic_ = magic.open(magic.MAGIC_MIME)
|
52
|
+
magic_.load()
|
53
|
+
def mime_magic_file(file):
|
54
|
+
return magic_.file(file)
|
55
|
+
def mime_magic_buffer(buffer):
|
56
|
+
return magic_.buffer(buffer)
|
57
|
+
|
58
|
+
except AttributeError:
|
59
|
+
## Older python-magic versions
|
60
|
+
magic_ = magic.open(magic.MAGIC_MIME)
|
61
|
+
magic_.load()
|
62
|
+
def mime_magic_file(file):
|
63
|
+
return magic_.file(file)
|
64
|
+
def mime_magic_buffer(buffer):
|
65
|
+
return magic_.buffer(buffer)
|
66
|
+
|
67
|
+
def mime_magic(file):
|
68
|
+
type = mime_magic_file(file)
|
69
|
+
if type != "application/x-gzip; charset=binary":
|
70
|
+
return (type, None)
|
71
|
+
else:
|
72
|
+
return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip')
|
73
|
+
|
74
|
+
except ImportError, e:
|
75
|
+
if str(e).find("magic") >= 0:
|
76
|
+
magic_message = "Module python-magic is not available."
|
77
|
+
else:
|
78
|
+
magic_message = "Module python-magic can't be used (%s)." % e.message
|
79
|
+
magic_message += " Guessing MIME types based on file extensions."
|
80
|
+
magic_warned = False
|
81
|
+
def mime_magic(file):
|
82
|
+
global magic_warned
|
83
|
+
if (not magic_warned):
|
84
|
+
warning(magic_message)
|
85
|
+
magic_warned = True
|
86
|
+
return mimetypes.guess_type(file)
|
87
|
+
|
88
|
+
__all__ = []
|
89
|
+
class S3Request(object):
|
90
|
+
def __init__(self, s3, method_string, resource, headers, params = {}):
|
91
|
+
self.s3 = s3
|
92
|
+
self.headers = SortedDict(headers or {}, ignore_case = True)
|
93
|
+
# Add in any extra headers from s3 config object
|
94
|
+
if self.s3.config.extra_headers:
|
95
|
+
self.headers.update(self.s3.config.extra_headers)
|
96
|
+
if len(self.s3.config.access_token)>0:
|
97
|
+
self.s3.config.role_refresh()
|
98
|
+
self.headers['x-amz-security-token']=self.s3.config.access_token
|
99
|
+
self.resource = resource
|
100
|
+
self.method_string = method_string
|
101
|
+
self.params = params
|
102
|
+
|
103
|
+
self.update_timestamp()
|
104
|
+
self.sign()
|
105
|
+
|
106
|
+
def update_timestamp(self):
|
107
|
+
if self.headers.has_key("date"):
|
108
|
+
del(self.headers["date"])
|
109
|
+
self.headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
110
|
+
|
111
|
+
def format_param_str(self):
|
112
|
+
"""
|
113
|
+
Format URL parameters from self.params and returns
|
114
|
+
?parm1=val1&parm2=val2 or an empty string if there
|
115
|
+
are no parameters. Output of this function should
|
116
|
+
be appended directly to self.resource['uri']
|
117
|
+
"""
|
118
|
+
param_str = ""
|
119
|
+
for param in self.params:
|
120
|
+
if self.params[param] not in (None, ""):
|
121
|
+
param_str += "&%s=%s" % (param, self.params[param])
|
122
|
+
else:
|
123
|
+
param_str += "&%s" % param
|
124
|
+
return param_str and "?" + param_str[1:]
|
125
|
+
|
126
|
+
def sign(self):
|
127
|
+
h = self.method_string + "\n"
|
128
|
+
h += self.headers.get("content-md5", "")+"\n"
|
129
|
+
h += self.headers.get("content-type", "")+"\n"
|
130
|
+
h += self.headers.get("date", "")+"\n"
|
131
|
+
for header in self.headers.keys():
|
132
|
+
if header.startswith("x-amz-"):
|
133
|
+
h += header+":"+str(self.headers[header])+"\n"
|
134
|
+
if self.resource['bucket']:
|
135
|
+
h += "/" + self.resource['bucket']
|
136
|
+
h += self.resource['uri']
|
137
|
+
debug("SignHeaders: " + repr(h))
|
138
|
+
signature = sign_string(h)
|
139
|
+
|
140
|
+
self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature
|
141
|
+
|
142
|
+
def get_triplet(self):
|
143
|
+
self.update_timestamp()
|
144
|
+
self.sign()
|
145
|
+
resource = dict(self.resource) ## take a copy
|
146
|
+
resource['uri'] += self.format_param_str()
|
147
|
+
return (self.method_string, resource, self.headers)
|
148
|
+
|
149
|
+
class S3(object):
|
150
|
+
http_methods = BidirMap(
|
151
|
+
GET = 0x01,
|
152
|
+
PUT = 0x02,
|
153
|
+
HEAD = 0x04,
|
154
|
+
DELETE = 0x08,
|
155
|
+
POST = 0x10,
|
156
|
+
MASK = 0x1F,
|
157
|
+
)
|
158
|
+
|
159
|
+
targets = BidirMap(
|
160
|
+
SERVICE = 0x0100,
|
161
|
+
BUCKET = 0x0200,
|
162
|
+
OBJECT = 0x0400,
|
163
|
+
MASK = 0x0700,
|
164
|
+
)
|
165
|
+
|
166
|
+
operations = BidirMap(
|
167
|
+
UNDFINED = 0x0000,
|
168
|
+
LIST_ALL_BUCKETS = targets["SERVICE"] | http_methods["GET"],
|
169
|
+
BUCKET_CREATE = targets["BUCKET"] | http_methods["PUT"],
|
170
|
+
BUCKET_LIST = targets["BUCKET"] | http_methods["GET"],
|
171
|
+
BUCKET_DELETE = targets["BUCKET"] | http_methods["DELETE"],
|
172
|
+
OBJECT_PUT = targets["OBJECT"] | http_methods["PUT"],
|
173
|
+
OBJECT_GET = targets["OBJECT"] | http_methods["GET"],
|
174
|
+
OBJECT_HEAD = targets["OBJECT"] | http_methods["HEAD"],
|
175
|
+
OBJECT_DELETE = targets["OBJECT"] | http_methods["DELETE"],
|
176
|
+
OBJECT_POST = targets["OBJECT"] | http_methods["POST"],
|
177
|
+
)
|
178
|
+
|
179
|
+
codes = {
|
180
|
+
"NoSuchBucket" : "Bucket '%s' does not exist",
|
181
|
+
"AccessDenied" : "Access to bucket '%s' was denied",
|
182
|
+
"BucketAlreadyExists" : "Bucket '%s' already exists",
|
183
|
+
}
|
184
|
+
|
185
|
+
## S3 sometimes sends HTTP-307 response
|
186
|
+
redir_map = {}
|
187
|
+
|
188
|
+
## Maximum attempts of re-issuing failed requests
|
189
|
+
_max_retries = 5
|
190
|
+
|
191
|
+
def __init__(self, config):
|
192
|
+
self.config = config
|
193
|
+
|
194
|
+
def get_hostname(self, bucket):
|
195
|
+
if bucket and check_bucket_name_dns_conformity(bucket):
|
196
|
+
if self.redir_map.has_key(bucket):
|
197
|
+
host = self.redir_map[bucket]
|
198
|
+
else:
|
199
|
+
host = getHostnameFromBucket(bucket)
|
200
|
+
else:
|
201
|
+
host = self.config.host_base
|
202
|
+
debug('get_hostname(%s): %s' % (bucket, host))
|
203
|
+
return host
|
204
|
+
|
205
|
+
def set_hostname(self, bucket, redir_hostname):
|
206
|
+
self.redir_map[bucket] = redir_hostname
|
207
|
+
|
208
|
+
def format_uri(self, resource):
|
209
|
+
if resource['bucket'] and not check_bucket_name_dns_conformity(resource['bucket']):
|
210
|
+
uri = "/%s%s" % (resource['bucket'], resource['uri'])
|
211
|
+
else:
|
212
|
+
uri = resource['uri']
|
213
|
+
if self.config.proxy_host != "":
|
214
|
+
uri = "http://%s%s" % (self.get_hostname(resource['bucket']), uri)
|
215
|
+
debug('format_uri(): ' + uri)
|
216
|
+
return uri
|
217
|
+
|
218
|
+
## Commands / Actions
|
219
|
+
def list_all_buckets(self):
|
220
|
+
request = self.create_request("LIST_ALL_BUCKETS")
|
221
|
+
response = self.send_request(request)
|
222
|
+
response["list"] = getListFromXml(response["data"], "Bucket")
|
223
|
+
return response
|
224
|
+
|
225
|
+
def bucket_list(self, bucket, prefix = None, recursive = None):
|
226
|
+
def _list_truncated(data):
|
227
|
+
## <IsTruncated> can either be "true" or "false" or be missing completely
|
228
|
+
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
|
229
|
+
return is_truncated.lower() != "false"
|
230
|
+
|
231
|
+
def _get_contents(data):
|
232
|
+
return getListFromXml(data, "Contents")
|
233
|
+
|
234
|
+
def _get_common_prefixes(data):
|
235
|
+
return getListFromXml(data, "CommonPrefixes")
|
236
|
+
|
237
|
+
uri_params = {}
|
238
|
+
truncated = True
|
239
|
+
list = []
|
240
|
+
prefixes = []
|
241
|
+
|
242
|
+
while truncated:
|
243
|
+
response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
|
244
|
+
current_list = _get_contents(response["data"])
|
245
|
+
current_prefixes = _get_common_prefixes(response["data"])
|
246
|
+
truncated = _list_truncated(response["data"])
|
247
|
+
if truncated:
|
248
|
+
if current_list:
|
249
|
+
uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"])
|
250
|
+
else:
|
251
|
+
uri_params['marker'] = self.urlencode_string(current_prefixes[-1]["Prefix"])
|
252
|
+
debug("Listing continues after '%s'" % uri_params['marker'])
|
253
|
+
|
254
|
+
list += current_list
|
255
|
+
prefixes += current_prefixes
|
256
|
+
|
257
|
+
response['list'] = list
|
258
|
+
response['common_prefixes'] = prefixes
|
259
|
+
return response
|
260
|
+
|
261
|
+
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
|
262
|
+
if prefix:
|
263
|
+
uri_params['prefix'] = self.urlencode_string(prefix)
|
264
|
+
if not self.config.recursive and not recursive:
|
265
|
+
uri_params['delimiter'] = "/"
|
266
|
+
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
|
267
|
+
response = self.send_request(request)
|
268
|
+
#debug(response)
|
269
|
+
return response
|
270
|
+
|
271
|
+
def bucket_create(self, bucket, bucket_location = None):
|
272
|
+
headers = SortedDict(ignore_case = True)
|
273
|
+
body = ""
|
274
|
+
if bucket_location and bucket_location.strip().upper() != "US":
|
275
|
+
bucket_location = bucket_location.strip()
|
276
|
+
if bucket_location.upper() == "EU":
|
277
|
+
bucket_location = bucket_location.upper()
|
278
|
+
else:
|
279
|
+
bucket_location = bucket_location.lower()
|
280
|
+
body = "<CreateBucketConfiguration><LocationConstraint>"
|
281
|
+
body += bucket_location
|
282
|
+
body += "</LocationConstraint></CreateBucketConfiguration>"
|
283
|
+
debug("bucket_location: " + body)
|
284
|
+
check_bucket_name(bucket, dns_strict = True)
|
285
|
+
else:
|
286
|
+
check_bucket_name(bucket, dns_strict = False)
|
287
|
+
if self.config.acl_public:
|
288
|
+
headers["x-amz-acl"] = "public-read"
|
289
|
+
request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers)
|
290
|
+
response = self.send_request(request, body)
|
291
|
+
return response
|
292
|
+
|
293
|
+
def bucket_delete(self, bucket):
|
294
|
+
request = self.create_request("BUCKET_DELETE", bucket = bucket)
|
295
|
+
response = self.send_request(request)
|
296
|
+
return response
|
297
|
+
|
298
|
+
def get_bucket_location(self, uri):
|
299
|
+
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?location")
|
300
|
+
response = self.send_request(request)
|
301
|
+
location = getTextFromXml(response['data'], "LocationConstraint")
|
302
|
+
if not location or location in [ "", "US" ]:
|
303
|
+
location = "us-east-1"
|
304
|
+
elif location == "EU":
|
305
|
+
location = "eu-west-1"
|
306
|
+
return location
|
307
|
+
|
308
|
+
def bucket_info(self, uri):
|
309
|
+
# For now reports only "Location". One day perhaps more.
|
310
|
+
response = {}
|
311
|
+
response['bucket-location'] = self.get_bucket_location(uri)
|
312
|
+
return response
|
313
|
+
|
314
|
+
def website_info(self, uri, bucket_location = None):
|
315
|
+
headers = SortedDict(ignore_case = True)
|
316
|
+
bucket = uri.bucket()
|
317
|
+
body = ""
|
318
|
+
|
319
|
+
request = self.create_request("BUCKET_LIST", bucket = bucket, extra="?website")
|
320
|
+
try:
|
321
|
+
response = self.send_request(request, body)
|
322
|
+
response['index_document'] = getTextFromXml(response['data'], ".//IndexDocument//Suffix")
|
323
|
+
response['error_document'] = getTextFromXml(response['data'], ".//ErrorDocument//Key")
|
324
|
+
response['website_endpoint'] = self.config.website_endpoint % {
|
325
|
+
"bucket" : uri.bucket(),
|
326
|
+
"location" : self.get_bucket_location(uri)}
|
327
|
+
return response
|
328
|
+
except S3Error, e:
|
329
|
+
if e.status == 404:
|
330
|
+
debug("Could not get /?website - website probably not configured for this bucket")
|
331
|
+
return None
|
332
|
+
raise
|
333
|
+
|
334
|
+
def website_create(self, uri, bucket_location = None):
|
335
|
+
headers = SortedDict(ignore_case = True)
|
336
|
+
bucket = uri.bucket()
|
337
|
+
body = '<WebsiteConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">'
|
338
|
+
body += ' <IndexDocument>'
|
339
|
+
body += (' <Suffix>%s</Suffix>' % self.config.website_index)
|
340
|
+
body += ' </IndexDocument>'
|
341
|
+
if self.config.website_error:
|
342
|
+
body += ' <ErrorDocument>'
|
343
|
+
body += (' <Key>%s</Key>' % self.config.website_error)
|
344
|
+
body += ' </ErrorDocument>'
|
345
|
+
body += '</WebsiteConfiguration>'
|
346
|
+
|
347
|
+
request = self.create_request("BUCKET_CREATE", bucket = bucket, extra="?website")
|
348
|
+
debug("About to send request '%s' with body '%s'" % (request, body))
|
349
|
+
response = self.send_request(request, body)
|
350
|
+
debug("Received response '%s'" % (response))
|
351
|
+
|
352
|
+
return response
|
353
|
+
|
354
|
+
def website_delete(self, uri, bucket_location = None):
|
355
|
+
headers = SortedDict(ignore_case = True)
|
356
|
+
bucket = uri.bucket()
|
357
|
+
body = ""
|
358
|
+
|
359
|
+
request = self.create_request("BUCKET_DELETE", bucket = bucket, extra="?website")
|
360
|
+
debug("About to send request '%s' with body '%s'" % (request, body))
|
361
|
+
response = self.send_request(request, body)
|
362
|
+
debug("Received response '%s'" % (response))
|
363
|
+
|
364
|
+
if response['status'] != 204:
|
365
|
+
raise S3ResponseError("Expected status 204: %s" % response)
|
366
|
+
|
367
|
+
return response
|
368
|
+
|
369
|
+
def add_encoding(self, filename, content_type):
|
370
|
+
if content_type.find("charset=") != -1:
|
371
|
+
return False
|
372
|
+
exts = self.config.add_encoding_exts.split(',')
|
373
|
+
if exts[0]=='':
|
374
|
+
return False
|
375
|
+
parts = filename.rsplit('.',2)
|
376
|
+
if len(parts) < 2:
|
377
|
+
return False
|
378
|
+
ext = parts[1]
|
379
|
+
if ext in exts:
|
380
|
+
return True
|
381
|
+
else:
|
382
|
+
return False
|
383
|
+
|
384
|
+
def object_put(self, filename, uri, extra_headers = None, extra_label = ""):
|
385
|
+
# TODO TODO
|
386
|
+
# Make it consistent with stream-oriented object_get()
|
387
|
+
if uri.type != "s3":
|
388
|
+
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
|
389
|
+
|
390
|
+
if filename != "-" and not os.path.isfile(filename):
|
391
|
+
raise InvalidFileError(u"%s is not a regular file" % unicodise(filename))
|
392
|
+
try:
|
393
|
+
if filename == "-":
|
394
|
+
file = sys.stdin
|
395
|
+
size = 0
|
396
|
+
else:
|
397
|
+
file = open(filename, "rb")
|
398
|
+
size = os.stat(filename)[ST_SIZE]
|
399
|
+
except (IOError, OSError), e:
|
400
|
+
raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror))
|
401
|
+
|
402
|
+
headers = SortedDict(ignore_case = True)
|
403
|
+
if extra_headers:
|
404
|
+
headers.update(extra_headers)
|
405
|
+
|
406
|
+
## MIME-type handling
|
407
|
+
content_type = self.config.mime_type
|
408
|
+
content_encoding = None
|
409
|
+
if filename != "-" and not content_type and self.config.guess_mime_type:
|
410
|
+
(content_type, content_encoding) = mime_magic(filename)
|
411
|
+
if not content_type:
|
412
|
+
content_type = self.config.default_mime_type
|
413
|
+
if not content_encoding:
|
414
|
+
content_encoding = self.config.encoding.upper()
|
415
|
+
|
416
|
+
## add charset to content type
|
417
|
+
if self.add_encoding(filename, content_type) and content_encoding is not None:
|
418
|
+
content_type = content_type + "; charset=" + content_encoding
|
419
|
+
|
420
|
+
headers["content-type"] = content_type
|
421
|
+
if content_encoding is not None:
|
422
|
+
headers["content-encoding"] = content_encoding
|
423
|
+
|
424
|
+
## Other Amazon S3 attributes
|
425
|
+
if self.config.acl_public:
|
426
|
+
headers["x-amz-acl"] = "public-read"
|
427
|
+
if self.config.reduced_redundancy:
|
428
|
+
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
|
429
|
+
|
430
|
+
## Multipart decision
|
431
|
+
multipart = False
|
432
|
+
if not self.config.enable_multipart and filename == "-":
|
433
|
+
raise ParameterError("Multi-part upload is required to upload from stdin")
|
434
|
+
if self.config.enable_multipart:
|
435
|
+
if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-":
|
436
|
+
multipart = True
|
437
|
+
if multipart:
|
438
|
+
# Multipart requests are quite different... drop here
|
439
|
+
return self.send_file_multipart(file, headers, uri, size)
|
440
|
+
|
441
|
+
## Not multipart...
|
442
|
+
headers["content-length"] = size
|
443
|
+
request = self.create_request("OBJECT_PUT", uri = uri, headers = headers)
|
444
|
+
labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label }
|
445
|
+
response = self.send_file(request, file, labels)
|
446
|
+
return response
|
447
|
+
|
448
|
+
def object_get(self, uri, stream, start_position = 0, extra_label = ""):
|
449
|
+
if uri.type != "s3":
|
450
|
+
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
|
451
|
+
request = self.create_request("OBJECT_GET", uri = uri)
|
452
|
+
labels = { 'source' : unicodise(uri.uri()), 'destination' : unicodise(stream.name), 'extra' : extra_label }
|
453
|
+
response = self.recv_file(request, stream, labels, start_position)
|
454
|
+
return response
|
455
|
+
|
456
|
+
def object_delete(self, uri):
|
457
|
+
if uri.type != "s3":
|
458
|
+
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
|
459
|
+
request = self.create_request("OBJECT_DELETE", uri = uri)
|
460
|
+
response = self.send_request(request)
|
461
|
+
return response
|
462
|
+
|
463
|
+
def object_copy(self, src_uri, dst_uri, extra_headers = None):
|
464
|
+
if src_uri.type != "s3":
|
465
|
+
raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
|
466
|
+
if dst_uri.type != "s3":
|
467
|
+
raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type)
|
468
|
+
headers = SortedDict(ignore_case = True)
|
469
|
+
headers['x-amz-copy-source'] = "/%s/%s" % (src_uri.bucket(), self.urlencode_string(src_uri.object()))
|
470
|
+
## TODO: For now COPY, later maybe add a switch?
|
471
|
+
headers['x-amz-metadata-directive'] = "COPY"
|
472
|
+
if self.config.acl_public:
|
473
|
+
headers["x-amz-acl"] = "public-read"
|
474
|
+
if self.config.reduced_redundancy:
|
475
|
+
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
|
476
|
+
# if extra_headers:
|
477
|
+
# headers.update(extra_headers)
|
478
|
+
request = self.create_request("OBJECT_PUT", uri = dst_uri, headers = headers)
|
479
|
+
response = self.send_request(request)
|
480
|
+
return response
|
481
|
+
|
482
|
+
def object_move(self, src_uri, dst_uri, extra_headers = None):
|
483
|
+
response_copy = self.object_copy(src_uri, dst_uri, extra_headers)
|
484
|
+
debug("Object %s copied to %s" % (src_uri, dst_uri))
|
485
|
+
if getRootTagName(response_copy["data"]) == "CopyObjectResult":
|
486
|
+
response_delete = self.object_delete(src_uri)
|
487
|
+
debug("Object %s deleted" % src_uri)
|
488
|
+
return response_copy
|
489
|
+
|
490
|
+
def object_info(self, uri):
|
491
|
+
request = self.create_request("OBJECT_HEAD", uri = uri)
|
492
|
+
response = self.send_request(request)
|
493
|
+
return response
|
494
|
+
|
495
|
+
def get_acl(self, uri):
|
496
|
+
if uri.has_object():
|
497
|
+
request = self.create_request("OBJECT_GET", uri = uri, extra = "?acl")
|
498
|
+
else:
|
499
|
+
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?acl")
|
500
|
+
|
501
|
+
response = self.send_request(request)
|
502
|
+
acl = ACL(response['data'])
|
503
|
+
return acl
|
504
|
+
|
505
|
+
def set_acl(self, uri, acl):
|
506
|
+
if uri.has_object():
|
507
|
+
request = self.create_request("OBJECT_PUT", uri = uri, extra = "?acl")
|
508
|
+
else:
|
509
|
+
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?acl")
|
510
|
+
|
511
|
+
body = str(acl)
|
512
|
+
debug(u"set_acl(%s): acl-xml: %s" % (uri, body))
|
513
|
+
response = self.send_request(request, body)
|
514
|
+
return response
|
515
|
+
|
516
|
+
def get_policy(self, uri):
|
517
|
+
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?policy")
|
518
|
+
response = self.send_request(request)
|
519
|
+
return response['data']
|
520
|
+
|
521
|
+
def set_policy(self, uri, policy):
|
522
|
+
headers = {}
|
523
|
+
# TODO check policy is proper json string
|
524
|
+
headers['content-type'] = 'application/json'
|
525
|
+
request = self.create_request("BUCKET_CREATE", uri = uri,
|
526
|
+
extra = "?policy", headers=headers)
|
527
|
+
body = policy
|
528
|
+
debug(u"set_policy(%s): policy-json: %s" % (uri, body))
|
529
|
+
request.sign()
|
530
|
+
response = self.send_request(request, body=body)
|
531
|
+
return response
|
532
|
+
|
533
|
+
def delete_policy(self, uri):
|
534
|
+
request = self.create_request("BUCKET_DELETE", uri = uri, extra = "?policy")
|
535
|
+
debug(u"delete_policy(%s)" % uri)
|
536
|
+
response = self.send_request(request)
|
537
|
+
return response
|
538
|
+
|
539
|
+
def get_accesslog(self, uri):
|
540
|
+
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging")
|
541
|
+
response = self.send_request(request)
|
542
|
+
accesslog = AccessLog(response['data'])
|
543
|
+
return accesslog
|
544
|
+
|
545
|
+
def set_accesslog_acl(self, uri):
|
546
|
+
acl = self.get_acl(uri)
|
547
|
+
debug("Current ACL(%s): %s" % (uri.uri(), str(acl)))
|
548
|
+
acl.appendGrantee(GranteeLogDelivery("READ_ACP"))
|
549
|
+
acl.appendGrantee(GranteeLogDelivery("WRITE"))
|
550
|
+
debug("Updated ACL(%s): %s" % (uri.uri(), str(acl)))
|
551
|
+
self.set_acl(uri, acl)
|
552
|
+
|
553
|
+
def set_accesslog(self, uri, enable, log_target_prefix_uri = None, acl_public = False):
|
554
|
+
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?logging")
|
555
|
+
accesslog = AccessLog()
|
556
|
+
if enable:
|
557
|
+
accesslog.enableLogging(log_target_prefix_uri)
|
558
|
+
accesslog.setAclPublic(acl_public)
|
559
|
+
else:
|
560
|
+
accesslog.disableLogging()
|
561
|
+
body = str(accesslog)
|
562
|
+
debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body))
|
563
|
+
try:
|
564
|
+
response = self.send_request(request, body)
|
565
|
+
except S3Error, e:
|
566
|
+
if e.info['Code'] == "InvalidTargetBucketForLogging":
|
567
|
+
info("Setting up log-delivery ACL for target bucket.")
|
568
|
+
self.set_accesslog_acl(S3Uri("s3://%s" % log_target_prefix_uri.bucket()))
|
569
|
+
response = self.send_request(request, body)
|
570
|
+
else:
|
571
|
+
raise
|
572
|
+
return accesslog, response
|
573
|
+
|
574
|
+
## Low level methods
|
575
|
+
def urlencode_string(self, string, urlencoding_mode = None):
|
576
|
+
if type(string) == unicode:
|
577
|
+
string = string.encode("utf-8")
|
578
|
+
|
579
|
+
if urlencoding_mode is None:
|
580
|
+
urlencoding_mode = self.config.urlencoding_mode
|
581
|
+
|
582
|
+
if urlencoding_mode == "verbatim":
|
583
|
+
## Don't do any pre-processing
|
584
|
+
return string
|
585
|
+
|
586
|
+
encoded = ""
|
587
|
+
## List of characters that must be escaped for S3
|
588
|
+
## Haven't found this in any official docs
|
589
|
+
## but my tests show it's more less correct.
|
590
|
+
## If you start getting InvalidSignature errors
|
591
|
+
## from S3 check the error headers returned
|
592
|
+
## from S3 to see whether the list hasn't
|
593
|
+
## changed.
|
594
|
+
for c in string: # I'm not sure how to know in what encoding
|
595
|
+
# 'object' is. Apparently "type(object)==str"
|
596
|
+
# but the contents is a string of unicode
|
597
|
+
# bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k'
|
598
|
+
# Don't know what it will do on non-utf8
|
599
|
+
# systems.
|
600
|
+
# [hope that sounds reassuring ;-)]
|
601
|
+
o = ord(c)
|
602
|
+
if (o < 0x20 or o == 0x7f):
|
603
|
+
if urlencoding_mode == "fixbucket":
|
604
|
+
encoded += "%%%02X" % o
|
605
|
+
else:
|
606
|
+
error(u"Non-printable character 0x%02x in: %s" % (o, string))
|
607
|
+
error(u"Please report it to s3tools-bugs@lists.sourceforge.net")
|
608
|
+
encoded += replace_nonprintables(c)
|
609
|
+
elif (o == 0x20 or # Space and below
|
610
|
+
o == 0x22 or # "
|
611
|
+
o == 0x23 or # #
|
612
|
+
o == 0x25 or # % (escape character)
|
613
|
+
o == 0x26 or # &
|
614
|
+
o == 0x2B or # + (or it would become <space>)
|
615
|
+
o == 0x3C or # <
|
616
|
+
o == 0x3E or # >
|
617
|
+
o == 0x3F or # ?
|
618
|
+
o == 0x60 or # `
|
619
|
+
o >= 123): # { and above, including >= 128 for UTF-8
|
620
|
+
encoded += "%%%02X" % o
|
621
|
+
else:
|
622
|
+
encoded += c
|
623
|
+
debug("String '%s' encoded to '%s'" % (string, encoded))
|
624
|
+
return encoded
|
625
|
+
|
626
|
+
def create_request(self, operation, uri = None, bucket = None, object = None, headers = None, extra = None, **params):
|
627
|
+
resource = { 'bucket' : None, 'uri' : "/" }
|
628
|
+
|
629
|
+
if uri and (bucket or object):
|
630
|
+
raise ValueError("Both 'uri' and either 'bucket' or 'object' parameters supplied")
|
631
|
+
## If URI is given use that instead of bucket/object parameters
|
632
|
+
if uri:
|
633
|
+
bucket = uri.bucket()
|
634
|
+
object = uri.has_object() and uri.object() or None
|
635
|
+
|
636
|
+
if bucket:
|
637
|
+
resource['bucket'] = str(bucket)
|
638
|
+
if object:
|
639
|
+
resource['uri'] = "/" + self.urlencode_string(object)
|
640
|
+
if extra:
|
641
|
+
resource['uri'] += extra
|
642
|
+
|
643
|
+
method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"])
|
644
|
+
|
645
|
+
request = S3Request(self, method_string, resource, headers, params)
|
646
|
+
|
647
|
+
debug("CreateRequest: resource[uri]=" + resource['uri'])
|
648
|
+
return request
|
649
|
+
|
650
|
+
def _fail_wait(self, retries):
|
651
|
+
# Wait a few seconds. The more it fails the more we wait.
|
652
|
+
return (self._max_retries - retries + 1) * 3
|
653
|
+
|
654
|
+
def send_request(self, request, body = None, retries = _max_retries):
|
655
|
+
method_string, resource, headers = request.get_triplet()
|
656
|
+
debug("Processing request, please wait...")
|
657
|
+
if not headers.has_key('content-length'):
|
658
|
+
headers['content-length'] = body and len(body) or 0
|
659
|
+
try:
|
660
|
+
# "Stringify" all headers
|
661
|
+
for header in headers.keys():
|
662
|
+
headers[header] = str(headers[header])
|
663
|
+
conn = ConnMan.get(self.get_hostname(resource['bucket']))
|
664
|
+
uri = self.format_uri(resource)
|
665
|
+
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
|
666
|
+
conn.c.request(method_string, uri, body, headers)
|
667
|
+
response = {}
|
668
|
+
http_response = conn.c.getresponse()
|
669
|
+
response["status"] = http_response.status
|
670
|
+
response["reason"] = http_response.reason
|
671
|
+
response["headers"] = convertTupleListToDict(http_response.getheaders())
|
672
|
+
response["data"] = http_response.read()
|
673
|
+
debug("Response: " + str(response))
|
674
|
+
ConnMan.put(conn)
|
675
|
+
except ParameterError, e:
|
676
|
+
raise
|
677
|
+
except Exception, e:
|
678
|
+
if retries:
|
679
|
+
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
|
680
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
681
|
+
time.sleep(self._fail_wait(retries))
|
682
|
+
return self.send_request(request, body, retries - 1)
|
683
|
+
else:
|
684
|
+
raise S3RequestError("Request failed for: %s" % resource['uri'])
|
685
|
+
|
686
|
+
if response["status"] == 307:
|
687
|
+
## RedirectPermanent
|
688
|
+
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
|
689
|
+
redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
|
690
|
+
self.set_hostname(redir_bucket, redir_hostname)
|
691
|
+
warning("Redirected to: %s" % (redir_hostname))
|
692
|
+
return self.send_request(request, body)
|
693
|
+
|
694
|
+
if response["status"] >= 500:
|
695
|
+
e = S3Error(response)
|
696
|
+
if retries:
|
697
|
+
warning(u"Retrying failed request: %s" % resource['uri'])
|
698
|
+
warning(unicode(e))
|
699
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
700
|
+
time.sleep(self._fail_wait(retries))
|
701
|
+
return self.send_request(request, body, retries - 1)
|
702
|
+
else:
|
703
|
+
raise e
|
704
|
+
|
705
|
+
if response["status"] < 200 or response["status"] > 299:
|
706
|
+
raise S3Error(response)
|
707
|
+
|
708
|
+
return response
|
709
|
+
|
710
|
+
def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1):
|
711
|
+
method_string, resource, headers = request.get_triplet()
|
712
|
+
size_left = size_total = headers.get("content-length")
|
713
|
+
if self.config.progress_meter:
|
714
|
+
progress = self.config.progress_class(labels, size_total)
|
715
|
+
else:
|
716
|
+
info("Sending file '%s', please wait..." % file.name)
|
717
|
+
timestamp_start = time.time()
|
718
|
+
try:
|
719
|
+
conn = ConnMan.get(self.get_hostname(resource['bucket']))
|
720
|
+
conn.c.putrequest(method_string, self.format_uri(resource))
|
721
|
+
for header in headers.keys():
|
722
|
+
conn.c.putheader(header, str(headers[header]))
|
723
|
+
conn.c.endheaders()
|
724
|
+
except ParameterError, e:
|
725
|
+
raise
|
726
|
+
except Exception, e:
|
727
|
+
if self.config.progress_meter:
|
728
|
+
progress.done("failed")
|
729
|
+
if retries:
|
730
|
+
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
|
731
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
732
|
+
time.sleep(self._fail_wait(retries))
|
733
|
+
# Connection error -> same throttle value
|
734
|
+
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
|
735
|
+
else:
|
736
|
+
raise S3UploadError("Upload failed for: %s" % resource['uri'])
|
737
|
+
if buffer == '':
|
738
|
+
file.seek(offset)
|
739
|
+
md5_hash = md5()
|
740
|
+
try:
|
741
|
+
while (size_left > 0):
|
742
|
+
#debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left))
|
743
|
+
if buffer == '':
|
744
|
+
data = file.read(min(self.config.send_chunk, size_left))
|
745
|
+
else:
|
746
|
+
data = buffer
|
747
|
+
md5_hash.update(data)
|
748
|
+
conn.c.send(data)
|
749
|
+
if self.config.progress_meter:
|
750
|
+
progress.update(delta_position = len(data))
|
751
|
+
size_left -= len(data)
|
752
|
+
if throttle:
|
753
|
+
time.sleep(throttle)
|
754
|
+
md5_computed = md5_hash.hexdigest()
|
755
|
+
response = {}
|
756
|
+
http_response = conn.c.getresponse()
|
757
|
+
response["status"] = http_response.status
|
758
|
+
response["reason"] = http_response.reason
|
759
|
+
response["headers"] = convertTupleListToDict(http_response.getheaders())
|
760
|
+
response["data"] = http_response.read()
|
761
|
+
response["size"] = size_total
|
762
|
+
ConnMan.put(conn)
|
763
|
+
debug(u"Response: %s" % response)
|
764
|
+
except ParameterError, e:
|
765
|
+
raise
|
766
|
+
except Exception, e:
|
767
|
+
if self.config.progress_meter:
|
768
|
+
progress.done("failed")
|
769
|
+
if retries:
|
770
|
+
if retries < self._max_retries:
|
771
|
+
throttle = throttle and throttle * 5 or 0.01
|
772
|
+
warning("Upload failed: %s (%s)" % (resource['uri'], e))
|
773
|
+
warning("Retrying on lower speed (throttle=%0.2f)" % throttle)
|
774
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
775
|
+
time.sleep(self._fail_wait(retries))
|
776
|
+
# Connection error -> same throttle value
|
777
|
+
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
|
778
|
+
else:
|
779
|
+
debug("Giving up on '%s' %s" % (file.name, e))
|
780
|
+
raise S3UploadError("Upload failed for: %s" % resource['uri'])
|
781
|
+
|
782
|
+
timestamp_end = time.time()
|
783
|
+
response["elapsed"] = timestamp_end - timestamp_start
|
784
|
+
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
|
785
|
+
|
786
|
+
if self.config.progress_meter:
|
787
|
+
## Finalising the upload takes some time -> update() progress meter
|
788
|
+
## to correct the average speed. Otherwise people will complain that
|
789
|
+
## 'progress' and response["speed"] are inconsistent ;-)
|
790
|
+
progress.update()
|
791
|
+
progress.done("done")
|
792
|
+
|
793
|
+
if response["status"] == 307:
|
794
|
+
## RedirectPermanent
|
795
|
+
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
|
796
|
+
redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
|
797
|
+
self.set_hostname(redir_bucket, redir_hostname)
|
798
|
+
warning("Redirected to: %s" % (redir_hostname))
|
799
|
+
return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size)
|
800
|
+
|
801
|
+
# S3 from time to time doesn't send ETag back in a response :-(
|
802
|
+
# Force re-upload here.
|
803
|
+
if not response['headers'].has_key('etag'):
|
804
|
+
response['headers']['etag'] = ''
|
805
|
+
|
806
|
+
if response["status"] < 200 or response["status"] > 299:
|
807
|
+
try_retry = False
|
808
|
+
if response["status"] >= 500:
|
809
|
+
## AWS internal error - retry
|
810
|
+
try_retry = True
|
811
|
+
elif response["status"] >= 400:
|
812
|
+
err = S3Error(response)
|
813
|
+
## Retriable client error?
|
814
|
+
if err.code in [ 'BadDigest', 'OperationAborted', 'TokenRefreshRequired', 'RequestTimeout' ]:
|
815
|
+
try_retry = True
|
816
|
+
|
817
|
+
if try_retry:
|
818
|
+
if retries:
|
819
|
+
warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response)))
|
820
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
821
|
+
time.sleep(self._fail_wait(retries))
|
822
|
+
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
|
823
|
+
else:
|
824
|
+
warning("Too many failures. Giving up on '%s'" % (file.name))
|
825
|
+
raise S3UploadError
|
826
|
+
|
827
|
+
## Non-recoverable error
|
828
|
+
raise S3Error(response)
|
829
|
+
|
830
|
+
debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"]))
|
831
|
+
if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest():
|
832
|
+
warning("MD5 Sums don't match!")
|
833
|
+
if retries:
|
834
|
+
warning("Retrying upload of %s" % (file.name))
|
835
|
+
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
|
836
|
+
else:
|
837
|
+
warning("Too many failures. Giving up on '%s'" % (file.name))
|
838
|
+
raise S3UploadError
|
839
|
+
|
840
|
+
return response
|
841
|
+
|
842
|
+
def send_file_multipart(self, file, headers, uri, size):
|
843
|
+
chunk_size = self.config.multipart_chunk_size_mb * 1024 * 1024
|
844
|
+
timestamp_start = time.time()
|
845
|
+
upload = MultiPartUpload(self, file, uri, headers)
|
846
|
+
upload.upload_all_parts()
|
847
|
+
response = upload.complete_multipart_upload()
|
848
|
+
timestamp_end = time.time()
|
849
|
+
response["elapsed"] = timestamp_end - timestamp_start
|
850
|
+
response["size"] = size
|
851
|
+
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
|
852
|
+
return response
|
853
|
+
|
854
|
+
def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries):
|
855
|
+
method_string, resource, headers = request.get_triplet()
|
856
|
+
if self.config.progress_meter:
|
857
|
+
progress = self.config.progress_class(labels, 0)
|
858
|
+
else:
|
859
|
+
info("Receiving file '%s', please wait..." % stream.name)
|
860
|
+
timestamp_start = time.time()
|
861
|
+
try:
|
862
|
+
conn = ConnMan.get(self.get_hostname(resource['bucket']))
|
863
|
+
conn.c.putrequest(method_string, self.format_uri(resource))
|
864
|
+
for header in headers.keys():
|
865
|
+
conn.c.putheader(header, str(headers[header]))
|
866
|
+
if start_position > 0:
|
867
|
+
debug("Requesting Range: %d .. end" % start_position)
|
868
|
+
conn.c.putheader("Range", "bytes=%d-" % start_position)
|
869
|
+
conn.c.endheaders()
|
870
|
+
response = {}
|
871
|
+
http_response = conn.c.getresponse()
|
872
|
+
response["status"] = http_response.status
|
873
|
+
response["reason"] = http_response.reason
|
874
|
+
response["headers"] = convertTupleListToDict(http_response.getheaders())
|
875
|
+
debug("Response: %s" % response)
|
876
|
+
except ParameterError, e:
|
877
|
+
raise
|
878
|
+
except Exception, e:
|
879
|
+
if self.config.progress_meter:
|
880
|
+
progress.done("failed")
|
881
|
+
if retries:
|
882
|
+
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
|
883
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
884
|
+
time.sleep(self._fail_wait(retries))
|
885
|
+
# Connection error -> same throttle value
|
886
|
+
return self.recv_file(request, stream, labels, start_position, retries - 1)
|
887
|
+
else:
|
888
|
+
raise S3DownloadError("Download failed for: %s" % resource['uri'])
|
889
|
+
|
890
|
+
if response["status"] == 307:
|
891
|
+
## RedirectPermanent
|
892
|
+
response['data'] = http_response.read()
|
893
|
+
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
|
894
|
+
redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
|
895
|
+
self.set_hostname(redir_bucket, redir_hostname)
|
896
|
+
warning("Redirected to: %s" % (redir_hostname))
|
897
|
+
return self.recv_file(request, stream, labels)
|
898
|
+
|
899
|
+
if response["status"] < 200 or response["status"] > 299:
|
900
|
+
raise S3Error(response)
|
901
|
+
|
902
|
+
if start_position == 0:
|
903
|
+
# Only compute MD5 on the fly if we're downloading from beginning
|
904
|
+
# Otherwise we'd get a nonsense.
|
905
|
+
md5_hash = md5()
|
906
|
+
size_left = int(response["headers"]["content-length"])
|
907
|
+
size_total = start_position + size_left
|
908
|
+
current_position = start_position
|
909
|
+
|
910
|
+
if self.config.progress_meter:
|
911
|
+
progress.total_size = size_total
|
912
|
+
progress.initial_position = current_position
|
913
|
+
progress.current_position = current_position
|
914
|
+
|
915
|
+
try:
|
916
|
+
while (current_position < size_total):
|
917
|
+
this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left
|
918
|
+
data = http_response.read(this_chunk)
|
919
|
+
if len(data) == 0:
|
920
|
+
raise S3Error("EOF from S3!")
|
921
|
+
|
922
|
+
stream.write(data)
|
923
|
+
if start_position == 0:
|
924
|
+
md5_hash.update(data)
|
925
|
+
current_position += len(data)
|
926
|
+
## Call progress meter from here...
|
927
|
+
if self.config.progress_meter:
|
928
|
+
progress.update(delta_position = len(data))
|
929
|
+
ConnMan.put(conn)
|
930
|
+
except Exception, e:
|
931
|
+
if self.config.progress_meter:
|
932
|
+
progress.done("failed")
|
933
|
+
if retries:
|
934
|
+
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
|
935
|
+
warning("Waiting %d sec..." % self._fail_wait(retries))
|
936
|
+
time.sleep(self._fail_wait(retries))
|
937
|
+
# Connection error -> same throttle value
|
938
|
+
return self.recv_file(request, stream, labels, current_position, retries - 1)
|
939
|
+
else:
|
940
|
+
raise S3DownloadError("Download failed for: %s" % resource['uri'])
|
941
|
+
|
942
|
+
stream.flush()
|
943
|
+
timestamp_end = time.time()
|
944
|
+
|
945
|
+
if self.config.progress_meter:
|
946
|
+
## The above stream.flush() may take some time -> update() progress meter
|
947
|
+
## to correct the average speed. Otherwise people will complain that
|
948
|
+
## 'progress' and response["speed"] are inconsistent ;-)
|
949
|
+
progress.update()
|
950
|
+
progress.done("done")
|
951
|
+
|
952
|
+
if start_position == 0:
|
953
|
+
# Only compute MD5 on the fly if we were downloading from the beginning
|
954
|
+
response["md5"] = md5_hash.hexdigest()
|
955
|
+
else:
|
956
|
+
# Otherwise try to compute MD5 of the output file
|
957
|
+
try:
|
958
|
+
response["md5"] = hash_file_md5(stream.name)
|
959
|
+
except IOError, e:
|
960
|
+
if e.errno != errno.ENOENT:
|
961
|
+
warning("Unable to open file: %s: %s" % (stream.name, e))
|
962
|
+
warning("Unable to verify MD5. Assume it matches.")
|
963
|
+
response["md5"] = response["headers"]["etag"]
|
964
|
+
|
965
|
+
response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0
|
966
|
+
response["elapsed"] = timestamp_end - timestamp_start
|
967
|
+
response["size"] = current_position
|
968
|
+
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
|
969
|
+
if response["size"] != start_position + long(response["headers"]["content-length"]):
|
970
|
+
warning("Reported size (%s) does not match received size (%s)" % (
|
971
|
+
start_position + response["headers"]["content-length"], response["size"]))
|
972
|
+
debug("ReceiveFile: Computed MD5 = %s" % response["md5"])
|
973
|
+
if not response["md5match"]:
|
974
|
+
warning("MD5 signatures do not match: computed=%s, received=%s" % (
|
975
|
+
response["md5"], response["headers"]["etag"]))
|
976
|
+
return response
|
977
|
+
__all__.append("S3")
|
978
|
+
|
979
|
+
# vim:et:ts=4:sts=4:ai
|