s3_cmd_bin 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/s3_cmd_bin/version.rb +1 -1
- data/resources/ChangeLog +0 -0
- data/resources/INSTALL +0 -0
- data/resources/MANIFEST.in +1 -0
- data/resources/NEWS +1 -40
- data/resources/README +0 -0
- data/resources/S3/ACL.py +0 -0
- data/resources/S3/AccessLog.py +0 -0
- data/resources/S3/BidirMap.py +0 -0
- data/resources/S3/CloudFront.py +8 -37
- data/resources/S3/Config.py +1 -88
- data/resources/S3/Exceptions.py +1 -1
- data/resources/S3/FileLists.py +100 -272
- data/resources/S3/MultiPart.py +21 -45
- data/resources/S3/PkgInfo.py +1 -1
- data/resources/S3/Progress.py +0 -17
- data/resources/S3/S3.py +52 -148
- data/resources/S3/S3Uri.py +2 -3
- data/resources/S3/SimpleDB.py +0 -3
- data/resources/S3/SortedDict.py +0 -3
- data/resources/S3/Utils.py +3 -80
- data/resources/S3/__init__.py +0 -0
- data/resources/TODO +0 -0
- data/resources/artwork/AtomicClockRadio.ttf +0 -0
- data/resources/artwork/TypeRa.ttf +0 -0
- data/resources/artwork/site-top-full-size.xcf +0 -0
- data/resources/artwork/site-top-label-download.png +0 -0
- data/resources/artwork/site-top-label-s3cmd.png +0 -0
- data/resources/artwork/site-top-label-s3sync.png +0 -0
- data/resources/artwork/site-top-s3tools-logo.png +0 -0
- data/resources/artwork/site-top.jpg +0 -0
- data/resources/artwork/site-top.png +0 -0
- data/resources/artwork/site-top.xcf +0 -0
- data/resources/run-tests.py +2 -2
- data/resources/s3cmd +306 -600
- data/resources/s3cmd.1 +97 -84
- data/resources/setup.cfg +0 -0
- data/resources/setup.py +0 -0
- data/resources/testsuite.tar.gz +0 -0
- metadata +2 -26
- data/resources/LICENSE +0 -339
- data/resources/Makefile +0 -4
- data/resources/S3/ACL.pyc +0 -0
- data/resources/S3/AccessLog.pyc +0 -0
- data/resources/S3/BidirMap.pyc +0 -0
- data/resources/S3/CloudFront.pyc +0 -0
- data/resources/S3/Config.pyc +0 -0
- data/resources/S3/ConnMan.py +0 -71
- data/resources/S3/ConnMan.pyc +0 -0
- data/resources/S3/Exceptions.pyc +0 -0
- data/resources/S3/FileDict.py +0 -53
- data/resources/S3/FileDict.pyc +0 -0
- data/resources/S3/FileLists.pyc +0 -0
- data/resources/S3/HashCache.py +0 -53
- data/resources/S3/HashCache.pyc +0 -0
- data/resources/S3/MultiPart.pyc +0 -0
- data/resources/S3/PkgInfo.pyc +0 -0
- data/resources/S3/Progress.pyc +0 -0
- data/resources/S3/S3.pyc +0 -0
- data/resources/S3/S3Uri.pyc +0 -0
- data/resources/S3/SortedDict.pyc +0 -0
- data/resources/S3/Utils.pyc +0 -0
- data/resources/S3/__init__.pyc +0 -0
- data/resources/magic +0 -63
data/lib/s3_cmd_bin/version.rb
CHANGED
data/resources/ChangeLog
CHANGED
File without changes
|
data/resources/INSTALL
CHANGED
File without changes
|
data/resources/MANIFEST.in
CHANGED
data/resources/NEWS
CHANGED
@@ -1,43 +1,4 @@
|
|
1
|
-
s3cmd 1.
|
2
|
-
==================
|
3
|
-
* Persistent HTTP/HTTPS connections for massive speedup (Michal Ludvig)
|
4
|
-
* New switch --quiet for suppressing all output (Siddarth Prakash)
|
5
|
-
* Honour "umask" on file downloads (Jason Dalton)
|
6
|
-
* Various bugfixes from many contributors
|
7
|
-
|
8
|
-
s3cmd 1.5.0-alpha2 - 2013-03-04
|
9
|
-
==================
|
10
|
-
* IAM roles support (David Kohen, Eric Dowd)
|
11
|
-
* Manage bucket policies (Kota Uenishi)
|
12
|
-
* Various bugfixes from many contributors
|
13
|
-
|
14
|
-
s3cmd 1.5.0-alpha1 - 2013-02-19
|
15
|
-
==================
|
16
|
-
* Server-side copy for hardlinks/softlinks to improve performance
|
17
|
-
(Matt Domsch)
|
18
|
-
* New [signurl] command (Craig Ringer)
|
19
|
-
* Improved symlink-loop detection (Michal Ludvig)
|
20
|
-
* Add --delete-after option for sync (Matt Domsch)
|
21
|
-
* Handle empty return bodies when processing S3 errors.
|
22
|
-
(Kelly McLaughlin)
|
23
|
-
* Upload from STDIN (Eric Connell)
|
24
|
-
* Updated bucket locations (Stefhen Hovland)
|
25
|
-
* Support custom HTTP headers (Brendan O'Connor, Karl Matthias)
|
26
|
-
* Improved MIME support (Karsten Sperling, Christopher Noyes)
|
27
|
-
* Added support for --acl-grant/--acl-revoke to 'sync' command
|
28
|
-
(Michael Tyson)
|
29
|
-
* CloudFront: Support default index and default root invalidation
|
30
|
-
(Josep del Rio)
|
31
|
-
* Command line options for access/secret keys (Matt Sweeney)
|
32
|
-
* Support [setpolicy] for setting bucket policies (Joe Fiorini)
|
33
|
-
* Respect the $TZ environment variable (James Brown)
|
34
|
-
* Reduce memory consumption for [s3cmd du] (Charlie Schluting)
|
35
|
-
* Rate limit progress updates (Steven Noonan)
|
36
|
-
* Download from S3 to a temp file first (Sumit Kumar)
|
37
|
-
* Reuse a single connection when doing a bucket list (Kelly McLaughlin)
|
38
|
-
* Delete empty files if object_get() failed (Oren Held)
|
39
|
-
|
40
|
-
s3cmd 1.1.0 - (never released)
|
1
|
+
s3cmd 1.1.0 - ???
|
41
2
|
===========
|
42
3
|
* MultiPart upload enabled for both [put] and [sync]. Default chunk
|
43
4
|
size is 15MB.
|
data/resources/README
CHANGED
File without changes
|
data/resources/S3/ACL.py
CHANGED
File without changes
|
data/resources/S3/AccessLog.py
CHANGED
File without changes
|
data/resources/S3/BidirMap.py
CHANGED
File without changes
|
data/resources/S3/CloudFront.py
CHANGED
@@ -133,7 +133,7 @@ class DistributionConfig(object):
|
|
133
133
|
## </Logging>
|
134
134
|
## </DistributionConfig>
|
135
135
|
|
136
|
-
EMPTY_CONFIG = "<DistributionConfig><
|
136
|
+
EMPTY_CONFIG = "<DistributionConfig><Origin/><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
|
137
137
|
xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version }
|
138
138
|
def __init__(self, xml = None, tree = None):
|
139
139
|
if xml is None:
|
@@ -174,8 +174,7 @@ class DistributionConfig(object):
|
|
174
174
|
tree.attrib['xmlns'] = DistributionConfig.xmlns
|
175
175
|
|
176
176
|
## Retain the order of the following calls!
|
177
|
-
|
178
|
-
appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org)
|
177
|
+
appendXmlTextNode("Origin", self.info['Origin'], tree)
|
179
178
|
appendXmlTextNode("CallerReference", self.info['CallerReference'], tree)
|
180
179
|
for cname in self.info['CNAME']:
|
181
180
|
appendXmlTextNode("CNAME", cname.lower(), tree)
|
@@ -282,7 +281,7 @@ class InvalidationBatch(object):
|
|
282
281
|
tree = ET.Element("InvalidationBatch")
|
283
282
|
|
284
283
|
for path in self.paths:
|
285
|
-
if
|
284
|
+
if path[0] != "/":
|
286
285
|
path = "/" + path
|
287
286
|
appendXmlTextNode("Path", path, tree)
|
288
287
|
appendXmlTextNode("CallerReference", self.reference, tree)
|
@@ -323,7 +322,7 @@ class CloudFront(object):
|
|
323
322
|
def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None):
|
324
323
|
dist_config = DistributionConfig()
|
325
324
|
dist_config.info['Enabled'] = True
|
326
|
-
dist_config.info['
|
325
|
+
dist_config.info['Origin'] = uri.host_name()
|
327
326
|
dist_config.info['CallerReference'] = str(uri)
|
328
327
|
dist_config.info['DefaultRootObject'] = default_root_object
|
329
328
|
if comment == None:
|
@@ -424,23 +423,7 @@ class CloudFront(object):
|
|
424
423
|
body = request_body, headers = headers)
|
425
424
|
return response
|
426
425
|
|
427
|
-
def InvalidateObjects(self, uri, paths
|
428
|
-
# joseprio: if the user doesn't want to invalidate the default index
|
429
|
-
# path, or if the user wants to invalidate the root of the default
|
430
|
-
# index, we need to process those paths
|
431
|
-
if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf):
|
432
|
-
new_paths = []
|
433
|
-
default_index_suffix = '/' + default_index_file
|
434
|
-
for path in paths:
|
435
|
-
if path.endswith(default_index_suffix) or path == default_index_file:
|
436
|
-
if invalidate_default_index_on_cf:
|
437
|
-
new_paths.append(path)
|
438
|
-
if invalidate_default_index_root_on_cf:
|
439
|
-
new_paths.append(path[:-len(default_index_file)])
|
440
|
-
else:
|
441
|
-
new_paths.append(path)
|
442
|
-
paths = new_paths
|
443
|
-
|
426
|
+
def InvalidateObjects(self, uri, paths):
|
444
427
|
# uri could be either cf:// or s3:// uri
|
445
428
|
cfuri = self.get_dist_name_for_bucket(uri)
|
446
429
|
if len(paths) > 999:
|
@@ -534,10 +517,6 @@ class CloudFront(object):
|
|
534
517
|
if not headers.has_key("x-amz-date"):
|
535
518
|
headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
536
519
|
|
537
|
-
if len(self.config.access_token)>0:
|
538
|
-
self.config.refresh_role()
|
539
|
-
headers['x-amz-security-token']=self.config.access_token
|
540
|
-
|
541
520
|
signature = self.sign_request(headers)
|
542
521
|
headers["Authorization"] = "AWS "+self.config.access_key+":"+signature
|
543
522
|
|
@@ -576,16 +555,8 @@ class CloudFront(object):
|
|
576
555
|
for d in response['dist_list'].dist_summs:
|
577
556
|
if d.info.has_key("S3Origin"):
|
578
557
|
CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri()
|
579
|
-
elif d.info.has_key("CustomOrigin"):
|
580
|
-
# Aral: This used to skip over distributions with CustomOrigin, however, we mustn't
|
581
|
-
# do this since S3 buckets that are set up as websites use custom origins.
|
582
|
-
# Thankfully, the custom origin URLs they use start with the URL of the
|
583
|
-
# S3 bucket. Here, we make use this naming convention to support this use case.
|
584
|
-
distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0];
|
585
|
-
distListIndex = distListIndex[:len(uri.bucket())]
|
586
|
-
CloudFront.dist_list[distListIndex] = d.uri()
|
587
558
|
else:
|
588
|
-
#
|
559
|
+
# Skip over distributions with CustomOrigin
|
589
560
|
continue
|
590
561
|
debug("dist_list: %s" % CloudFront.dist_list)
|
591
562
|
try:
|
@@ -692,7 +663,7 @@ class Cmd(object):
|
|
692
663
|
d = response['distribution']
|
693
664
|
dc = d.info['DistributionConfig']
|
694
665
|
output("Distribution created:")
|
695
|
-
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['
|
666
|
+
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
|
696
667
|
pretty_output("DistId", d.uri())
|
697
668
|
pretty_output("DomainName", d.info['DomainName'])
|
698
669
|
pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
|
@@ -734,7 +705,7 @@ class Cmd(object):
|
|
734
705
|
response = cf.GetDistInfo(cfuri)
|
735
706
|
d = response['distribution']
|
736
707
|
dc = d.info['DistributionConfig']
|
737
|
-
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['
|
708
|
+
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
|
738
709
|
pretty_output("DistId", d.uri())
|
739
710
|
pretty_output("DomainName", d.info['DomainName'])
|
740
711
|
pretty_output("Status", d.info['Status'])
|
data/resources/S3/Config.py
CHANGED
@@ -7,11 +7,8 @@ import logging
|
|
7
7
|
from logging import debug, info, warning, error
|
8
8
|
import re
|
9
9
|
import os
|
10
|
-
import sys
|
11
10
|
import Progress
|
12
11
|
from SortedDict import SortedDict
|
13
|
-
import httplib
|
14
|
-
import json
|
15
12
|
|
16
13
|
class Config(object):
|
17
14
|
_instance = None
|
@@ -19,7 +16,6 @@ class Config(object):
|
|
19
16
|
_doc = {}
|
20
17
|
access_key = ""
|
21
18
|
secret_key = ""
|
22
|
-
access_token = ""
|
23
19
|
host_base = "s3.amazonaws.com"
|
24
20
|
host_bucket = "%(bucket)s.s3.amazonaws.com"
|
25
21
|
simpledb_host = "sdb.amazonaws.com"
|
@@ -44,7 +40,6 @@ class Config(object):
|
|
44
40
|
proxy_port = 3128
|
45
41
|
encrypt = False
|
46
42
|
dry_run = False
|
47
|
-
add_encoding_exts = ""
|
48
43
|
preserve_attrs = True
|
49
44
|
preserve_attrs_list = [
|
50
45
|
'uname', # Verbose owner Name (e.g. 'root')
|
@@ -55,14 +50,10 @@ class Config(object):
|
|
55
50
|
'mtime', # Modification timestamp
|
56
51
|
'ctime', # Creation timestamp
|
57
52
|
'mode', # File mode (e.g. rwxr-xr-x = 755)
|
58
|
-
'md5', # File MD5 (if known)
|
59
53
|
#'acl', # Full ACL (not yet supported)
|
60
54
|
]
|
61
55
|
delete_removed = False
|
62
|
-
delete_after = False
|
63
|
-
delete_after_fetch = False
|
64
56
|
_doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted"
|
65
|
-
delay_updates = False
|
66
57
|
gpg_passphrase = ""
|
67
58
|
gpg_command = ""
|
68
59
|
gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
|
@@ -89,15 +80,9 @@ class Config(object):
|
|
89
80
|
follow_symlinks = False
|
90
81
|
socket_timeout = 300
|
91
82
|
invalidate_on_cf = False
|
92
|
-
# joseprio: new flags for default index invalidation
|
93
|
-
invalidate_default_index_on_cf = False
|
94
|
-
invalidate_default_index_root_on_cf = True
|
95
83
|
website_index = "index.html"
|
96
84
|
website_error = ""
|
97
85
|
website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
|
98
|
-
additional_destinations = []
|
99
|
-
cache_file = ""
|
100
|
-
add_headers = ""
|
101
86
|
|
102
87
|
## Creating a singleton
|
103
88
|
def __new__(self, configfile = None):
|
@@ -107,73 +92,7 @@ class Config(object):
|
|
107
92
|
|
108
93
|
def __init__(self, configfile = None):
|
109
94
|
if configfile:
|
110
|
-
|
111
|
-
self.read_config_file(configfile)
|
112
|
-
except IOError, e:
|
113
|
-
if 'AWS_CREDENTIAL_FILE' in os.environ:
|
114
|
-
self.env_config()
|
115
|
-
if len(self.access_key)==0:
|
116
|
-
self.role_config()
|
117
|
-
|
118
|
-
def role_config(self):
|
119
|
-
conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1)
|
120
|
-
try:
|
121
|
-
conn.request('GET', "/latest/meta-data/iam/security-credentials/")
|
122
|
-
resp = conn.getresponse()
|
123
|
-
files = resp.read()
|
124
|
-
if resp.status == 200 and len(files)>1:
|
125
|
-
conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files)
|
126
|
-
resp=conn.getresponse()
|
127
|
-
if resp.status == 200:
|
128
|
-
creds=json.load(resp)
|
129
|
-
Config().update_option('access_key', creds['AccessKeyId'].encode('ascii'))
|
130
|
-
Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii'))
|
131
|
-
Config().update_option('access_token', creds['Token'].encode('ascii'))
|
132
|
-
else:
|
133
|
-
raise IOError
|
134
|
-
else:
|
135
|
-
raise IOError
|
136
|
-
except:
|
137
|
-
raise
|
138
|
-
|
139
|
-
def role_refresh(self):
|
140
|
-
try:
|
141
|
-
self.role_config()
|
142
|
-
except:
|
143
|
-
warning("Could not refresh role")
|
144
|
-
|
145
|
-
def env_config(self):
|
146
|
-
cred_content = ""
|
147
|
-
try:
|
148
|
-
cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r')
|
149
|
-
cred_content = cred_file.read()
|
150
|
-
except IOError, e:
|
151
|
-
debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE']))
|
152
|
-
r_data = re.compile("^\s*(?P<orig_key>\w+)\s*=\s*(?P<value>.*)")
|
153
|
-
r_quotes = re.compile("^\"(.*)\"\s*$")
|
154
|
-
if len(cred_content)>0:
|
155
|
-
for line in cred_content.splitlines():
|
156
|
-
is_data = r_data.match(line)
|
157
|
-
is_data = r_data.match(line)
|
158
|
-
if is_data:
|
159
|
-
data = is_data.groupdict()
|
160
|
-
if r_quotes.match(data["value"]):
|
161
|
-
data["value"] = data["value"][1:-1]
|
162
|
-
if data["orig_key"]=="AWSAccessKeyId":
|
163
|
-
data["key"] = "access_key"
|
164
|
-
elif data["orig_key"]=="AWSSecretKey":
|
165
|
-
data["key"] = "secret_key"
|
166
|
-
else:
|
167
|
-
del data["key"]
|
168
|
-
if "key" in data:
|
169
|
-
Config().update_option(data["key"], data["value"])
|
170
|
-
if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
|
171
|
-
print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3)
|
172
|
-
else:
|
173
|
-
print_value = data["value"]
|
174
|
-
debug("env_Config: %s->%s" % (data["key"], print_value))
|
175
|
-
|
176
|
-
|
95
|
+
self.read_config_file(configfile)
|
177
96
|
|
178
97
|
def option_list(self):
|
179
98
|
retval = []
|
@@ -193,12 +112,6 @@ class Config(object):
|
|
193
112
|
cp = ConfigParser(configfile)
|
194
113
|
for option in self.option_list():
|
195
114
|
self.update_option(option, cp.get(option))
|
196
|
-
|
197
|
-
if cp.get('add_headers'):
|
198
|
-
for option in cp.get('add_headers').split(","):
|
199
|
-
(key, value) = option.split(':')
|
200
|
-
self.extra_headers[key.replace('_', '-').strip()] = value.strip()
|
201
|
-
|
202
115
|
self._parsed_files.append(configfile)
|
203
116
|
|
204
117
|
def dump_config(self, stream):
|
data/resources/S3/Exceptions.py
CHANGED
@@ -44,7 +44,7 @@ class S3Error (S3Exception):
|
|
44
44
|
if response.has_key("headers"):
|
45
45
|
for header in response["headers"]:
|
46
46
|
debug("HttpHeader: %s: %s" % (header, response["headers"][header]))
|
47
|
-
if response.has_key("data")
|
47
|
+
if response.has_key("data"):
|
48
48
|
tree = getTreeFromXml(response["data"])
|
49
49
|
error_node = tree
|
50
50
|
if not error_node.tag == "Error":
|
data/resources/S3/FileLists.py
CHANGED
@@ -6,59 +6,56 @@
|
|
6
6
|
from S3 import S3
|
7
7
|
from Config import Config
|
8
8
|
from S3Uri import S3Uri
|
9
|
-
from
|
9
|
+
from SortedDict import SortedDict
|
10
10
|
from Utils import *
|
11
11
|
from Exceptions import ParameterError
|
12
|
-
from HashCache import HashCache
|
13
12
|
|
14
13
|
from logging import debug, info, warning, error
|
15
14
|
|
16
15
|
import os
|
17
16
|
import glob
|
18
|
-
import copy
|
19
17
|
|
20
|
-
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"
|
18
|
+
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"]
|
21
19
|
|
22
20
|
def _fswalk_follow_symlinks(path):
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
yield (dirpath, dirnames, filenames)
|
21
|
+
'''
|
22
|
+
Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
|
23
|
+
|
24
|
+
If a recursive directory link is detected, emit a warning and skip.
|
25
|
+
'''
|
26
|
+
assert os.path.isdir(path) # only designed for directory argument
|
27
|
+
walkdirs = set([path])
|
28
|
+
targets = set()
|
29
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
30
|
+
for dirname in dirnames:
|
31
|
+
current = os.path.join(dirpath, dirname)
|
32
|
+
target = os.path.realpath(current)
|
33
|
+
if os.path.islink(current):
|
34
|
+
if target in targets:
|
35
|
+
warning("Skipping recursively symlinked directory %s" % dirname)
|
36
|
+
else:
|
37
|
+
walkdirs.add(current)
|
38
|
+
targets.add(target)
|
39
|
+
for walkdir in walkdirs:
|
40
|
+
for value in os.walk(walkdir):
|
41
|
+
yield value
|
42
|
+
|
43
|
+
def _fswalk(path, follow_symlinks):
|
44
|
+
'''
|
45
|
+
Directory tree generator
|
46
|
+
|
47
|
+
path (str) is the root of the directory tree to walk
|
48
|
+
|
49
|
+
follow_symlinks (bool) indicates whether to descend into symbolically linked directories
|
50
|
+
'''
|
51
|
+
if follow_symlinks:
|
52
|
+
return _fswalk_follow_symlinks(path)
|
53
|
+
return os.walk(path)
|
57
54
|
|
58
55
|
def filter_exclude_include(src_list):
|
59
56
|
info(u"Applying --exclude/--include")
|
60
57
|
cfg = Config()
|
61
|
-
exclude_list =
|
58
|
+
exclude_list = SortedDict(ignore_case = False)
|
62
59
|
for file in src_list.keys():
|
63
60
|
debug(u"CHECK: %s" % file)
|
64
61
|
excluded = False
|
@@ -81,90 +78,23 @@ def filter_exclude_include(src_list):
|
|
81
78
|
del(src_list[file])
|
82
79
|
continue
|
83
80
|
else:
|
84
|
-
debug(u"PASS: %
|
81
|
+
debug(u"PASS: %s" % (file))
|
85
82
|
return src_list, exclude_list
|
86
83
|
|
87
|
-
def handle_exclude_include_walk(root, dirs, files):
|
88
|
-
cfg = Config()
|
89
|
-
copydirs = copy.copy(dirs)
|
90
|
-
copyfiles = copy.copy(files)
|
91
|
-
|
92
|
-
# exclude dir matches in the current directory
|
93
|
-
# this prevents us from recursing down trees we know we want to ignore
|
94
|
-
for x in copydirs:
|
95
|
-
d = os.path.join(root, x, '')
|
96
|
-
debug(u"CHECK: %r" % d)
|
97
|
-
excluded = False
|
98
|
-
for r in cfg.exclude:
|
99
|
-
if r.search(d):
|
100
|
-
excluded = True
|
101
|
-
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
102
|
-
break
|
103
|
-
if excluded:
|
104
|
-
## No need to check for --include if not excluded
|
105
|
-
for r in cfg.include:
|
106
|
-
if r.search(d):
|
107
|
-
excluded = False
|
108
|
-
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
109
|
-
break
|
110
|
-
if excluded:
|
111
|
-
## Still excluded - ok, action it
|
112
|
-
debug(u"EXCLUDE: %r" % d)
|
113
|
-
dirs.remove(x)
|
114
|
-
continue
|
115
|
-
else:
|
116
|
-
debug(u"PASS: %r" % (d))
|
117
|
-
|
118
|
-
# exclude file matches in the current directory
|
119
|
-
for x in copyfiles:
|
120
|
-
file = os.path.join(root, x)
|
121
|
-
debug(u"CHECK: %r" % file)
|
122
|
-
excluded = False
|
123
|
-
for r in cfg.exclude:
|
124
|
-
if r.search(file):
|
125
|
-
excluded = True
|
126
|
-
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
127
|
-
break
|
128
|
-
if excluded:
|
129
|
-
## No need to check for --include if not excluded
|
130
|
-
for r in cfg.include:
|
131
|
-
if r.search(file):
|
132
|
-
excluded = False
|
133
|
-
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
134
|
-
break
|
135
|
-
if excluded:
|
136
|
-
## Still excluded - ok, action it
|
137
|
-
debug(u"EXCLUDE: %s" % file)
|
138
|
-
files.remove(x)
|
139
|
-
continue
|
140
|
-
else:
|
141
|
-
debug(u"PASS: %r" % (file))
|
142
|
-
|
143
84
|
def fetch_local_list(args, recursive = None):
|
144
|
-
def _get_filelist_local(
|
85
|
+
def _get_filelist_local(local_uri):
|
145
86
|
info(u"Compiling list of local files...")
|
146
|
-
|
147
|
-
if deunicodise(local_uri.basename()) == "-":
|
148
|
-
loc_list["-"] = {
|
149
|
-
'full_name_unicode' : '-',
|
150
|
-
'full_name' : '-',
|
151
|
-
'size' : -1,
|
152
|
-
'mtime' : -1,
|
153
|
-
}
|
154
|
-
return loc_list, True
|
155
87
|
if local_uri.isdir():
|
156
88
|
local_base = deunicodise(local_uri.basename())
|
157
89
|
local_path = deunicodise(local_uri.path())
|
158
|
-
|
159
|
-
filelist = _fswalk_follow_symlinks(local_path)
|
160
|
-
else:
|
161
|
-
filelist = _fswalk_no_symlinks(local_path)
|
90
|
+
filelist = _fswalk(local_path, cfg.follow_symlinks)
|
162
91
|
single_file = False
|
163
92
|
else:
|
164
93
|
local_base = ""
|
165
94
|
local_path = deunicodise(local_uri.dirname())
|
166
95
|
filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
|
167
96
|
single_file = True
|
97
|
+
loc_list = SortedDict(ignore_case = False)
|
168
98
|
for root, dirs, files in filelist:
|
169
99
|
rel_root = root.replace(local_path, local_base, 1)
|
170
100
|
for f in files:
|
@@ -188,43 +118,13 @@ def fetch_local_list(args, recursive = None):
|
|
188
118
|
'full_name' : full_name,
|
189
119
|
'size' : sr.st_size,
|
190
120
|
'mtime' : sr.st_mtime,
|
191
|
-
'dev' : sr.st_dev,
|
192
|
-
'inode' : sr.st_ino,
|
193
|
-
'uid' : sr.st_uid,
|
194
|
-
'gid' : sr.st_gid,
|
195
|
-
'sr': sr # save it all, may need it in preserve_attrs_list
|
196
121
|
## TODO: Possibly more to save here...
|
197
122
|
}
|
198
|
-
if 'md5' in cfg.sync_checks:
|
199
|
-
md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
|
200
|
-
if md5 is None:
|
201
|
-
try:
|
202
|
-
md5 = loc_list.get_md5(relative_file) # this does the file I/O
|
203
|
-
except IOError:
|
204
|
-
continue
|
205
|
-
cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
|
206
|
-
loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
|
207
123
|
return loc_list, single_file
|
208
124
|
|
209
|
-
def _maintain_cache(cache, local_list):
|
210
|
-
if cfg.cache_file:
|
211
|
-
cache.mark_all_for_purge()
|
212
|
-
for i in local_list.keys():
|
213
|
-
cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
|
214
|
-
cache.purge()
|
215
|
-
cache.save(cfg.cache_file)
|
216
|
-
|
217
125
|
cfg = Config()
|
218
|
-
|
219
|
-
cache = HashCache()
|
220
|
-
if cfg.cache_file:
|
221
|
-
try:
|
222
|
-
cache.load(cfg.cache_file)
|
223
|
-
except IOError:
|
224
|
-
info(u"No cache file found, creating it.")
|
225
|
-
|
226
126
|
local_uris = []
|
227
|
-
local_list =
|
127
|
+
local_list = SortedDict(ignore_case = False)
|
228
128
|
single_file = False
|
229
129
|
|
230
130
|
if type(args) not in (list, tuple):
|
@@ -242,7 +142,8 @@ def fetch_local_list(args, recursive = None):
|
|
242
142
|
local_uris.append(uri)
|
243
143
|
|
244
144
|
for uri in local_uris:
|
245
|
-
list_for_uri, single_file = _get_filelist_local(
|
145
|
+
list_for_uri, single_file = _get_filelist_local(uri)
|
146
|
+
local_list.update(list_for_uri)
|
246
147
|
|
247
148
|
## Single file is True if and only if the user
|
248
149
|
## specified one local URI and that URI represents
|
@@ -252,8 +153,6 @@ def fetch_local_list(args, recursive = None):
|
|
252
153
|
if len(local_list) > 1:
|
253
154
|
single_file = False
|
254
155
|
|
255
|
-
_maintain_cache(cache, local_list)
|
256
|
-
|
257
156
|
return local_list, single_file
|
258
157
|
|
259
158
|
def fetch_remote_list(args, require_attribs = False, recursive = None):
|
@@ -284,7 +183,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
284
183
|
rem_base = rem_base[:rem_base.rfind('/')+1]
|
285
184
|
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
|
286
185
|
rem_base_len = len(rem_base)
|
287
|
-
rem_list =
|
186
|
+
rem_list = SortedDict(ignore_case = False)
|
288
187
|
break_now = False
|
289
188
|
for object in response['list']:
|
290
189
|
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
|
@@ -292,7 +191,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
292
191
|
key = os.path.basename(object['Key'])
|
293
192
|
object_uri_str = remote_uri_original.uri()
|
294
193
|
break_now = True
|
295
|
-
rem_list =
|
194
|
+
rem_list = {} ## Remove whatever has already been put to rem_list
|
296
195
|
else:
|
297
196
|
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
|
298
197
|
object_uri_str = remote_uri.uri() + key
|
@@ -303,18 +202,14 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
303
202
|
'object_key' : object['Key'],
|
304
203
|
'object_uri_str' : object_uri_str,
|
305
204
|
'base_uri' : remote_uri,
|
306
|
-
'dev' : None,
|
307
|
-
'inode' : None,
|
308
205
|
}
|
309
|
-
md5 = object['ETag'][1:-1]
|
310
|
-
rem_list.record_md5(key, md5)
|
311
206
|
if break_now:
|
312
207
|
break
|
313
208
|
return rem_list
|
314
209
|
|
315
210
|
cfg = Config()
|
316
211
|
remote_uris = []
|
317
|
-
remote_list =
|
212
|
+
remote_list = SortedDict(ignore_case = False)
|
318
213
|
|
319
214
|
if type(args) not in (list, tuple):
|
320
215
|
args = [args]
|
@@ -333,7 +228,6 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
333
228
|
objectlist = _get_filelist_remote(uri)
|
334
229
|
for key in objectlist:
|
335
230
|
remote_list[key] = objectlist[key]
|
336
|
-
remote_list.record_md5(key, objectlist.get_md5(key))
|
337
231
|
else:
|
338
232
|
for uri in remote_uris:
|
339
233
|
uri_str = str(uri)
|
@@ -371,147 +265,81 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
371
265
|
'md5': response['headers']['etag'].strip('"\''),
|
372
266
|
'timestamp' : dateRFC822toUnix(response['headers']['date'])
|
373
267
|
})
|
374
|
-
# get md5 from header if it's present. We would have set that during upload
|
375
|
-
if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
|
376
|
-
attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
|
377
|
-
if attrs.has_key('md5'):
|
378
|
-
remote_item.update({'md5': attrs['md5']})
|
379
|
-
|
380
268
|
remote_list[key] = remote_item
|
381
269
|
return remote_list
|
382
270
|
|
383
|
-
def
|
384
|
-
attrs = {}
|
385
|
-
for attr in attrs_header.split("/"):
|
386
|
-
key, val = attr.split(":")
|
387
|
-
attrs[key] = val
|
388
|
-
return attrs
|
389
|
-
|
390
|
-
|
391
|
-
def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
|
271
|
+
def compare_filelists(src_list, dst_list, src_remote, dst_remote):
|
392
272
|
def __direction_str(is_remote):
|
393
273
|
return is_remote and "remote" or "local"
|
394
274
|
|
395
|
-
|
396
|
-
"""Return True if src_list[file] matches dst_list[file], else False"""
|
397
|
-
attribs_match = True
|
398
|
-
if not (src_list.has_key(file) and dst_list.has_key(file)):
|
399
|
-
info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
|
400
|
-
return False
|
401
|
-
|
402
|
-
## check size first
|
403
|
-
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
|
404
|
-
debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
|
405
|
-
attribs_match = False
|
406
|
-
|
407
|
-
## check md5
|
408
|
-
compare_md5 = 'md5' in cfg.sync_checks
|
409
|
-
# Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
|
410
|
-
if compare_md5:
|
411
|
-
if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
|
412
|
-
compare_md5 = False
|
413
|
-
info(u"disabled md5 check for %s" % file)
|
414
|
-
if attribs_match and compare_md5:
|
415
|
-
try:
|
416
|
-
src_md5 = src_list.get_md5(file)
|
417
|
-
dst_md5 = dst_list.get_md5(file)
|
418
|
-
except (IOError,OSError), e:
|
419
|
-
# md5 sum verification failed - ignore that file altogether
|
420
|
-
debug(u"IGNR: %s (disappeared)" % (file))
|
421
|
-
warning(u"%s: file disappeared, ignoring." % (file))
|
422
|
-
raise
|
423
|
-
|
424
|
-
if src_md5 != dst_md5:
|
425
|
-
## checksums are different.
|
426
|
-
attribs_match = False
|
427
|
-
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
|
428
|
-
|
429
|
-
return attribs_match
|
430
|
-
|
431
|
-
# we don't support local->local sync, use 'rsync' or something like that instead ;-)
|
275
|
+
# We don't support local->local sync, use 'rsync' or something like that instead ;-)
|
432
276
|
assert(not(src_remote == False and dst_remote == False))
|
433
277
|
|
434
278
|
info(u"Verifying attributes...")
|
435
279
|
cfg = Config()
|
436
|
-
|
437
|
-
## Items left on update_list will be transferred after src_list
|
438
|
-
## Items left on copy_pairs will be copied from dst1 to dst2
|
439
|
-
update_list = FileDict(ignore_case = False)
|
440
|
-
## Items left on dst_list will be deleted
|
441
|
-
copy_pairs = []
|
280
|
+
exists_list = SortedDict(ignore_case = False)
|
442
281
|
|
443
282
|
debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
|
283
|
+
debug("src_list.keys: %s" % src_list.keys())
|
284
|
+
debug("dst_list.keys: %s" % dst_list.keys())
|
444
285
|
|
445
|
-
for
|
446
|
-
debug(u"CHECK: %s" %
|
447
|
-
|
448
|
-
if dst_list.has_key(relative_file):
|
286
|
+
for file in src_list.keys():
|
287
|
+
debug(u"CHECK: %s" % file)
|
288
|
+
if dst_list.has_key(file):
|
449
289
|
## Was --skip-existing requested?
|
450
290
|
if cfg.skip_existing:
|
451
|
-
debug(u"IGNR: %s (used --skip-existing)" % (
|
452
|
-
|
453
|
-
del(
|
454
|
-
|
455
|
-
|
456
|
-
try:
|
457
|
-
same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
|
458
|
-
except (IOError,OSError), e:
|
459
|
-
debug(u"IGNR: %s (disappeared)" % (relative_file))
|
460
|
-
warning(u"%s: file disappeared, ignoring." % (relative_file))
|
461
|
-
del(src_list[relative_file])
|
462
|
-
del(dst_list[relative_file])
|
291
|
+
debug(u"IGNR: %s (used --skip-existing)" % (file))
|
292
|
+
exists_list[file] = src_list[file]
|
293
|
+
del(src_list[file])
|
294
|
+
## Remove from destination-list, all that is left there will be deleted
|
295
|
+
del(dst_list[file])
|
463
296
|
continue
|
464
297
|
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
298
|
+
attribs_match = True
|
299
|
+
## Check size first
|
300
|
+
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
|
301
|
+
debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
|
302
|
+
attribs_match = False
|
469
303
|
|
470
|
-
|
471
|
-
|
304
|
+
## Check MD5
|
305
|
+
compare_md5 = 'md5' in cfg.sync_checks
|
306
|
+
# Multipart-uploaded files don't have a valid MD5 sum - it ends with "...-NN"
|
307
|
+
if compare_md5 and (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
|
308
|
+
compare_md5 = False
|
309
|
+
info(u"Disabled MD5 check for %s" % file)
|
310
|
+
if attribs_match and compare_md5:
|
472
311
|
try:
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
del dst_list[relative_file]
|
312
|
+
if src_remote == False and dst_remote == True:
|
313
|
+
src_md5 = hash_file_md5(src_list[file]['full_name'])
|
314
|
+
dst_md5 = dst_list[file]['md5']
|
315
|
+
elif src_remote == True and dst_remote == False:
|
316
|
+
src_md5 = src_list[file]['md5']
|
317
|
+
dst_md5 = hash_file_md5(dst_list[file]['full_name'])
|
318
|
+
elif src_remote == True and dst_remote == True:
|
319
|
+
src_md5 = src_list[file]['md5']
|
320
|
+
dst_md5 = dst_list[file]['md5']
|
321
|
+
except (IOError,OSError), e:
|
322
|
+
# MD5 sum verification failed - ignore that file altogether
|
323
|
+
debug(u"IGNR: %s (disappeared)" % (file))
|
324
|
+
warning(u"%s: file disappeared, ignoring." % (file))
|
325
|
+
del(src_list[file])
|
326
|
+
del(dst_list[file])
|
327
|
+
continue
|
490
328
|
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
|
502
|
-
copy_pairs.append((src_list[relative_file], dst1, relative_file))
|
503
|
-
del(src_list[relative_file])
|
504
|
-
else:
|
505
|
-
# we don't have this file, and we don't have a copy of this file elsewhere. Get it.
|
506
|
-
# record that we will get this file transferred to us (before all the copies), so if we come across it later again,
|
507
|
-
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
|
508
|
-
dst_list.record_md5(relative_file, md5)
|
329
|
+
if src_md5 != dst_md5:
|
330
|
+
## Checksums are different.
|
331
|
+
attribs_match = False
|
332
|
+
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
|
333
|
+
|
334
|
+
if attribs_match:
|
335
|
+
## Remove from source-list, all that is left there will be transferred
|
336
|
+
debug(u"IGNR: %s (transfer not needed)" % file)
|
337
|
+
exists_list[file] = src_list[file]
|
338
|
+
del(src_list[file])
|
509
339
|
|
510
|
-
|
511
|
-
|
512
|
-
# leave only those not on src_list + update_list
|
513
|
-
del dst_list[f]
|
340
|
+
## Remove from destination-list, all that is left there will be deleted
|
341
|
+
del(dst_list[file])
|
514
342
|
|
515
|
-
return src_list, dst_list,
|
343
|
+
return src_list, dst_list, exists_list
|
516
344
|
|
517
345
|
# vim:et:ts=4:sts=4:ai
|