s3_cmd_bin 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/s3_cmd_bin/version.rb +1 -1
- data/resources/ChangeLog +0 -0
- data/resources/INSTALL +0 -0
- data/resources/MANIFEST.in +1 -0
- data/resources/NEWS +1 -40
- data/resources/README +0 -0
- data/resources/S3/ACL.py +0 -0
- data/resources/S3/AccessLog.py +0 -0
- data/resources/S3/BidirMap.py +0 -0
- data/resources/S3/CloudFront.py +8 -37
- data/resources/S3/Config.py +1 -88
- data/resources/S3/Exceptions.py +1 -1
- data/resources/S3/FileLists.py +100 -272
- data/resources/S3/MultiPart.py +21 -45
- data/resources/S3/PkgInfo.py +1 -1
- data/resources/S3/Progress.py +0 -17
- data/resources/S3/S3.py +52 -148
- data/resources/S3/S3Uri.py +2 -3
- data/resources/S3/SimpleDB.py +0 -3
- data/resources/S3/SortedDict.py +0 -3
- data/resources/S3/Utils.py +3 -80
- data/resources/S3/__init__.py +0 -0
- data/resources/TODO +0 -0
- data/resources/artwork/AtomicClockRadio.ttf +0 -0
- data/resources/artwork/TypeRa.ttf +0 -0
- data/resources/artwork/site-top-full-size.xcf +0 -0
- data/resources/artwork/site-top-label-download.png +0 -0
- data/resources/artwork/site-top-label-s3cmd.png +0 -0
- data/resources/artwork/site-top-label-s3sync.png +0 -0
- data/resources/artwork/site-top-s3tools-logo.png +0 -0
- data/resources/artwork/site-top.jpg +0 -0
- data/resources/artwork/site-top.png +0 -0
- data/resources/artwork/site-top.xcf +0 -0
- data/resources/run-tests.py +2 -2
- data/resources/s3cmd +306 -600
- data/resources/s3cmd.1 +97 -84
- data/resources/setup.cfg +0 -0
- data/resources/setup.py +0 -0
- data/resources/testsuite.tar.gz +0 -0
- metadata +2 -26
- data/resources/LICENSE +0 -339
- data/resources/Makefile +0 -4
- data/resources/S3/ACL.pyc +0 -0
- data/resources/S3/AccessLog.pyc +0 -0
- data/resources/S3/BidirMap.pyc +0 -0
- data/resources/S3/CloudFront.pyc +0 -0
- data/resources/S3/Config.pyc +0 -0
- data/resources/S3/ConnMan.py +0 -71
- data/resources/S3/ConnMan.pyc +0 -0
- data/resources/S3/Exceptions.pyc +0 -0
- data/resources/S3/FileDict.py +0 -53
- data/resources/S3/FileDict.pyc +0 -0
- data/resources/S3/FileLists.pyc +0 -0
- data/resources/S3/HashCache.py +0 -53
- data/resources/S3/HashCache.pyc +0 -0
- data/resources/S3/MultiPart.pyc +0 -0
- data/resources/S3/PkgInfo.pyc +0 -0
- data/resources/S3/Progress.pyc +0 -0
- data/resources/S3/S3.pyc +0 -0
- data/resources/S3/S3Uri.pyc +0 -0
- data/resources/S3/SortedDict.pyc +0 -0
- data/resources/S3/Utils.pyc +0 -0
- data/resources/S3/__init__.pyc +0 -0
- data/resources/magic +0 -63
data/lib/s3_cmd_bin/version.rb
CHANGED
data/resources/ChangeLog
CHANGED
File without changes
|
data/resources/INSTALL
CHANGED
File without changes
|
data/resources/MANIFEST.in
CHANGED
data/resources/NEWS
CHANGED
@@ -1,43 +1,4 @@
|
|
1
|
-
s3cmd 1.
|
2
|
-
==================
|
3
|
-
* Persistent HTTP/HTTPS connections for massive speedup (Michal Ludvig)
|
4
|
-
* New switch --quiet for suppressing all output (Siddarth Prakash)
|
5
|
-
* Honour "umask" on file downloads (Jason Dalton)
|
6
|
-
* Various bugfixes from many contributors
|
7
|
-
|
8
|
-
s3cmd 1.5.0-alpha2 - 2013-03-04
|
9
|
-
==================
|
10
|
-
* IAM roles support (David Kohen, Eric Dowd)
|
11
|
-
* Manage bucket policies (Kota Uenishi)
|
12
|
-
* Various bugfixes from many contributors
|
13
|
-
|
14
|
-
s3cmd 1.5.0-alpha1 - 2013-02-19
|
15
|
-
==================
|
16
|
-
* Server-side copy for hardlinks/softlinks to improve performance
|
17
|
-
(Matt Domsch)
|
18
|
-
* New [signurl] command (Craig Ringer)
|
19
|
-
* Improved symlink-loop detection (Michal Ludvig)
|
20
|
-
* Add --delete-after option for sync (Matt Domsch)
|
21
|
-
* Handle empty return bodies when processing S3 errors.
|
22
|
-
(Kelly McLaughlin)
|
23
|
-
* Upload from STDIN (Eric Connell)
|
24
|
-
* Updated bucket locations (Stefhen Hovland)
|
25
|
-
* Support custom HTTP headers (Brendan O'Connor, Karl Matthias)
|
26
|
-
* Improved MIME support (Karsten Sperling, Christopher Noyes)
|
27
|
-
* Added support for --acl-grant/--acl-revoke to 'sync' command
|
28
|
-
(Michael Tyson)
|
29
|
-
* CloudFront: Support default index and default root invalidation
|
30
|
-
(Josep del Rio)
|
31
|
-
* Command line options for access/secret keys (Matt Sweeney)
|
32
|
-
* Support [setpolicy] for setting bucket policies (Joe Fiorini)
|
33
|
-
* Respect the $TZ environment variable (James Brown)
|
34
|
-
* Reduce memory consumption for [s3cmd du] (Charlie Schluting)
|
35
|
-
* Rate limit progress updates (Steven Noonan)
|
36
|
-
* Download from S3 to a temp file first (Sumit Kumar)
|
37
|
-
* Reuse a single connection when doing a bucket list (Kelly McLaughlin)
|
38
|
-
* Delete empty files if object_get() failed (Oren Held)
|
39
|
-
|
40
|
-
s3cmd 1.1.0 - (never released)
|
1
|
+
s3cmd 1.1.0 - ???
|
41
2
|
===========
|
42
3
|
* MultiPart upload enabled for both [put] and [sync]. Default chunk
|
43
4
|
size is 15MB.
|
data/resources/README
CHANGED
File without changes
|
data/resources/S3/ACL.py
CHANGED
File without changes
|
data/resources/S3/AccessLog.py
CHANGED
File without changes
|
data/resources/S3/BidirMap.py
CHANGED
File without changes
|
data/resources/S3/CloudFront.py
CHANGED
@@ -133,7 +133,7 @@ class DistributionConfig(object):
|
|
133
133
|
## </Logging>
|
134
134
|
## </DistributionConfig>
|
135
135
|
|
136
|
-
EMPTY_CONFIG = "<DistributionConfig><
|
136
|
+
EMPTY_CONFIG = "<DistributionConfig><Origin/><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
|
137
137
|
xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version }
|
138
138
|
def __init__(self, xml = None, tree = None):
|
139
139
|
if xml is None:
|
@@ -174,8 +174,7 @@ class DistributionConfig(object):
|
|
174
174
|
tree.attrib['xmlns'] = DistributionConfig.xmlns
|
175
175
|
|
176
176
|
## Retain the order of the following calls!
|
177
|
-
|
178
|
-
appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org)
|
177
|
+
appendXmlTextNode("Origin", self.info['Origin'], tree)
|
179
178
|
appendXmlTextNode("CallerReference", self.info['CallerReference'], tree)
|
180
179
|
for cname in self.info['CNAME']:
|
181
180
|
appendXmlTextNode("CNAME", cname.lower(), tree)
|
@@ -282,7 +281,7 @@ class InvalidationBatch(object):
|
|
282
281
|
tree = ET.Element("InvalidationBatch")
|
283
282
|
|
284
283
|
for path in self.paths:
|
285
|
-
if
|
284
|
+
if path[0] != "/":
|
286
285
|
path = "/" + path
|
287
286
|
appendXmlTextNode("Path", path, tree)
|
288
287
|
appendXmlTextNode("CallerReference", self.reference, tree)
|
@@ -323,7 +322,7 @@ class CloudFront(object):
|
|
323
322
|
def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None):
|
324
323
|
dist_config = DistributionConfig()
|
325
324
|
dist_config.info['Enabled'] = True
|
326
|
-
dist_config.info['
|
325
|
+
dist_config.info['Origin'] = uri.host_name()
|
327
326
|
dist_config.info['CallerReference'] = str(uri)
|
328
327
|
dist_config.info['DefaultRootObject'] = default_root_object
|
329
328
|
if comment == None:
|
@@ -424,23 +423,7 @@ class CloudFront(object):
|
|
424
423
|
body = request_body, headers = headers)
|
425
424
|
return response
|
426
425
|
|
427
|
-
def InvalidateObjects(self, uri, paths
|
428
|
-
# joseprio: if the user doesn't want to invalidate the default index
|
429
|
-
# path, or if the user wants to invalidate the root of the default
|
430
|
-
# index, we need to process those paths
|
431
|
-
if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf):
|
432
|
-
new_paths = []
|
433
|
-
default_index_suffix = '/' + default_index_file
|
434
|
-
for path in paths:
|
435
|
-
if path.endswith(default_index_suffix) or path == default_index_file:
|
436
|
-
if invalidate_default_index_on_cf:
|
437
|
-
new_paths.append(path)
|
438
|
-
if invalidate_default_index_root_on_cf:
|
439
|
-
new_paths.append(path[:-len(default_index_file)])
|
440
|
-
else:
|
441
|
-
new_paths.append(path)
|
442
|
-
paths = new_paths
|
443
|
-
|
426
|
+
def InvalidateObjects(self, uri, paths):
|
444
427
|
# uri could be either cf:// or s3:// uri
|
445
428
|
cfuri = self.get_dist_name_for_bucket(uri)
|
446
429
|
if len(paths) > 999:
|
@@ -534,10 +517,6 @@ class CloudFront(object):
|
|
534
517
|
if not headers.has_key("x-amz-date"):
|
535
518
|
headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
536
519
|
|
537
|
-
if len(self.config.access_token)>0:
|
538
|
-
self.config.refresh_role()
|
539
|
-
headers['x-amz-security-token']=self.config.access_token
|
540
|
-
|
541
520
|
signature = self.sign_request(headers)
|
542
521
|
headers["Authorization"] = "AWS "+self.config.access_key+":"+signature
|
543
522
|
|
@@ -576,16 +555,8 @@ class CloudFront(object):
|
|
576
555
|
for d in response['dist_list'].dist_summs:
|
577
556
|
if d.info.has_key("S3Origin"):
|
578
557
|
CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri()
|
579
|
-
elif d.info.has_key("CustomOrigin"):
|
580
|
-
# Aral: This used to skip over distributions with CustomOrigin, however, we mustn't
|
581
|
-
# do this since S3 buckets that are set up as websites use custom origins.
|
582
|
-
# Thankfully, the custom origin URLs they use start with the URL of the
|
583
|
-
# S3 bucket. Here, we make use this naming convention to support this use case.
|
584
|
-
distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0];
|
585
|
-
distListIndex = distListIndex[:len(uri.bucket())]
|
586
|
-
CloudFront.dist_list[distListIndex] = d.uri()
|
587
558
|
else:
|
588
|
-
#
|
559
|
+
# Skip over distributions with CustomOrigin
|
589
560
|
continue
|
590
561
|
debug("dist_list: %s" % CloudFront.dist_list)
|
591
562
|
try:
|
@@ -692,7 +663,7 @@ class Cmd(object):
|
|
692
663
|
d = response['distribution']
|
693
664
|
dc = d.info['DistributionConfig']
|
694
665
|
output("Distribution created:")
|
695
|
-
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['
|
666
|
+
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
|
696
667
|
pretty_output("DistId", d.uri())
|
697
668
|
pretty_output("DomainName", d.info['DomainName'])
|
698
669
|
pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
|
@@ -734,7 +705,7 @@ class Cmd(object):
|
|
734
705
|
response = cf.GetDistInfo(cfuri)
|
735
706
|
d = response['distribution']
|
736
707
|
dc = d.info['DistributionConfig']
|
737
|
-
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['
|
708
|
+
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
|
738
709
|
pretty_output("DistId", d.uri())
|
739
710
|
pretty_output("DomainName", d.info['DomainName'])
|
740
711
|
pretty_output("Status", d.info['Status'])
|
data/resources/S3/Config.py
CHANGED
@@ -7,11 +7,8 @@ import logging
|
|
7
7
|
from logging import debug, info, warning, error
|
8
8
|
import re
|
9
9
|
import os
|
10
|
-
import sys
|
11
10
|
import Progress
|
12
11
|
from SortedDict import SortedDict
|
13
|
-
import httplib
|
14
|
-
import json
|
15
12
|
|
16
13
|
class Config(object):
|
17
14
|
_instance = None
|
@@ -19,7 +16,6 @@ class Config(object):
|
|
19
16
|
_doc = {}
|
20
17
|
access_key = ""
|
21
18
|
secret_key = ""
|
22
|
-
access_token = ""
|
23
19
|
host_base = "s3.amazonaws.com"
|
24
20
|
host_bucket = "%(bucket)s.s3.amazonaws.com"
|
25
21
|
simpledb_host = "sdb.amazonaws.com"
|
@@ -44,7 +40,6 @@ class Config(object):
|
|
44
40
|
proxy_port = 3128
|
45
41
|
encrypt = False
|
46
42
|
dry_run = False
|
47
|
-
add_encoding_exts = ""
|
48
43
|
preserve_attrs = True
|
49
44
|
preserve_attrs_list = [
|
50
45
|
'uname', # Verbose owner Name (e.g. 'root')
|
@@ -55,14 +50,10 @@ class Config(object):
|
|
55
50
|
'mtime', # Modification timestamp
|
56
51
|
'ctime', # Creation timestamp
|
57
52
|
'mode', # File mode (e.g. rwxr-xr-x = 755)
|
58
|
-
'md5', # File MD5 (if known)
|
59
53
|
#'acl', # Full ACL (not yet supported)
|
60
54
|
]
|
61
55
|
delete_removed = False
|
62
|
-
delete_after = False
|
63
|
-
delete_after_fetch = False
|
64
56
|
_doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted"
|
65
|
-
delay_updates = False
|
66
57
|
gpg_passphrase = ""
|
67
58
|
gpg_command = ""
|
68
59
|
gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
|
@@ -89,15 +80,9 @@ class Config(object):
|
|
89
80
|
follow_symlinks = False
|
90
81
|
socket_timeout = 300
|
91
82
|
invalidate_on_cf = False
|
92
|
-
# joseprio: new flags for default index invalidation
|
93
|
-
invalidate_default_index_on_cf = False
|
94
|
-
invalidate_default_index_root_on_cf = True
|
95
83
|
website_index = "index.html"
|
96
84
|
website_error = ""
|
97
85
|
website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
|
98
|
-
additional_destinations = []
|
99
|
-
cache_file = ""
|
100
|
-
add_headers = ""
|
101
86
|
|
102
87
|
## Creating a singleton
|
103
88
|
def __new__(self, configfile = None):
|
@@ -107,73 +92,7 @@ class Config(object):
|
|
107
92
|
|
108
93
|
def __init__(self, configfile = None):
|
109
94
|
if configfile:
|
110
|
-
|
111
|
-
self.read_config_file(configfile)
|
112
|
-
except IOError, e:
|
113
|
-
if 'AWS_CREDENTIAL_FILE' in os.environ:
|
114
|
-
self.env_config()
|
115
|
-
if len(self.access_key)==0:
|
116
|
-
self.role_config()
|
117
|
-
|
118
|
-
def role_config(self):
|
119
|
-
conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1)
|
120
|
-
try:
|
121
|
-
conn.request('GET', "/latest/meta-data/iam/security-credentials/")
|
122
|
-
resp = conn.getresponse()
|
123
|
-
files = resp.read()
|
124
|
-
if resp.status == 200 and len(files)>1:
|
125
|
-
conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files)
|
126
|
-
resp=conn.getresponse()
|
127
|
-
if resp.status == 200:
|
128
|
-
creds=json.load(resp)
|
129
|
-
Config().update_option('access_key', creds['AccessKeyId'].encode('ascii'))
|
130
|
-
Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii'))
|
131
|
-
Config().update_option('access_token', creds['Token'].encode('ascii'))
|
132
|
-
else:
|
133
|
-
raise IOError
|
134
|
-
else:
|
135
|
-
raise IOError
|
136
|
-
except:
|
137
|
-
raise
|
138
|
-
|
139
|
-
def role_refresh(self):
|
140
|
-
try:
|
141
|
-
self.role_config()
|
142
|
-
except:
|
143
|
-
warning("Could not refresh role")
|
144
|
-
|
145
|
-
def env_config(self):
|
146
|
-
cred_content = ""
|
147
|
-
try:
|
148
|
-
cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r')
|
149
|
-
cred_content = cred_file.read()
|
150
|
-
except IOError, e:
|
151
|
-
debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE']))
|
152
|
-
r_data = re.compile("^\s*(?P<orig_key>\w+)\s*=\s*(?P<value>.*)")
|
153
|
-
r_quotes = re.compile("^\"(.*)\"\s*$")
|
154
|
-
if len(cred_content)>0:
|
155
|
-
for line in cred_content.splitlines():
|
156
|
-
is_data = r_data.match(line)
|
157
|
-
is_data = r_data.match(line)
|
158
|
-
if is_data:
|
159
|
-
data = is_data.groupdict()
|
160
|
-
if r_quotes.match(data["value"]):
|
161
|
-
data["value"] = data["value"][1:-1]
|
162
|
-
if data["orig_key"]=="AWSAccessKeyId":
|
163
|
-
data["key"] = "access_key"
|
164
|
-
elif data["orig_key"]=="AWSSecretKey":
|
165
|
-
data["key"] = "secret_key"
|
166
|
-
else:
|
167
|
-
del data["key"]
|
168
|
-
if "key" in data:
|
169
|
-
Config().update_option(data["key"], data["value"])
|
170
|
-
if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
|
171
|
-
print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3)
|
172
|
-
else:
|
173
|
-
print_value = data["value"]
|
174
|
-
debug("env_Config: %s->%s" % (data["key"], print_value))
|
175
|
-
|
176
|
-
|
95
|
+
self.read_config_file(configfile)
|
177
96
|
|
178
97
|
def option_list(self):
|
179
98
|
retval = []
|
@@ -193,12 +112,6 @@ class Config(object):
|
|
193
112
|
cp = ConfigParser(configfile)
|
194
113
|
for option in self.option_list():
|
195
114
|
self.update_option(option, cp.get(option))
|
196
|
-
|
197
|
-
if cp.get('add_headers'):
|
198
|
-
for option in cp.get('add_headers').split(","):
|
199
|
-
(key, value) = option.split(':')
|
200
|
-
self.extra_headers[key.replace('_', '-').strip()] = value.strip()
|
201
|
-
|
202
115
|
self._parsed_files.append(configfile)
|
203
116
|
|
204
117
|
def dump_config(self, stream):
|
data/resources/S3/Exceptions.py
CHANGED
@@ -44,7 +44,7 @@ class S3Error (S3Exception):
|
|
44
44
|
if response.has_key("headers"):
|
45
45
|
for header in response["headers"]:
|
46
46
|
debug("HttpHeader: %s: %s" % (header, response["headers"][header]))
|
47
|
-
if response.has_key("data")
|
47
|
+
if response.has_key("data"):
|
48
48
|
tree = getTreeFromXml(response["data"])
|
49
49
|
error_node = tree
|
50
50
|
if not error_node.tag == "Error":
|
data/resources/S3/FileLists.py
CHANGED
@@ -6,59 +6,56 @@
|
|
6
6
|
from S3 import S3
|
7
7
|
from Config import Config
|
8
8
|
from S3Uri import S3Uri
|
9
|
-
from
|
9
|
+
from SortedDict import SortedDict
|
10
10
|
from Utils import *
|
11
11
|
from Exceptions import ParameterError
|
12
|
-
from HashCache import HashCache
|
13
12
|
|
14
13
|
from logging import debug, info, warning, error
|
15
14
|
|
16
15
|
import os
|
17
16
|
import glob
|
18
|
-
import copy
|
19
17
|
|
20
|
-
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"
|
18
|
+
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"]
|
21
19
|
|
22
20
|
def _fswalk_follow_symlinks(path):
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
yield (dirpath, dirnames, filenames)
|
21
|
+
'''
|
22
|
+
Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
|
23
|
+
|
24
|
+
If a recursive directory link is detected, emit a warning and skip.
|
25
|
+
'''
|
26
|
+
assert os.path.isdir(path) # only designed for directory argument
|
27
|
+
walkdirs = set([path])
|
28
|
+
targets = set()
|
29
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
30
|
+
for dirname in dirnames:
|
31
|
+
current = os.path.join(dirpath, dirname)
|
32
|
+
target = os.path.realpath(current)
|
33
|
+
if os.path.islink(current):
|
34
|
+
if target in targets:
|
35
|
+
warning("Skipping recursively symlinked directory %s" % dirname)
|
36
|
+
else:
|
37
|
+
walkdirs.add(current)
|
38
|
+
targets.add(target)
|
39
|
+
for walkdir in walkdirs:
|
40
|
+
for value in os.walk(walkdir):
|
41
|
+
yield value
|
42
|
+
|
43
|
+
def _fswalk(path, follow_symlinks):
|
44
|
+
'''
|
45
|
+
Directory tree generator
|
46
|
+
|
47
|
+
path (str) is the root of the directory tree to walk
|
48
|
+
|
49
|
+
follow_symlinks (bool) indicates whether to descend into symbolically linked directories
|
50
|
+
'''
|
51
|
+
if follow_symlinks:
|
52
|
+
return _fswalk_follow_symlinks(path)
|
53
|
+
return os.walk(path)
|
57
54
|
|
58
55
|
def filter_exclude_include(src_list):
|
59
56
|
info(u"Applying --exclude/--include")
|
60
57
|
cfg = Config()
|
61
|
-
exclude_list =
|
58
|
+
exclude_list = SortedDict(ignore_case = False)
|
62
59
|
for file in src_list.keys():
|
63
60
|
debug(u"CHECK: %s" % file)
|
64
61
|
excluded = False
|
@@ -81,90 +78,23 @@ def filter_exclude_include(src_list):
|
|
81
78
|
del(src_list[file])
|
82
79
|
continue
|
83
80
|
else:
|
84
|
-
debug(u"PASS: %
|
81
|
+
debug(u"PASS: %s" % (file))
|
85
82
|
return src_list, exclude_list
|
86
83
|
|
87
|
-
def handle_exclude_include_walk(root, dirs, files):
|
88
|
-
cfg = Config()
|
89
|
-
copydirs = copy.copy(dirs)
|
90
|
-
copyfiles = copy.copy(files)
|
91
|
-
|
92
|
-
# exclude dir matches in the current directory
|
93
|
-
# this prevents us from recursing down trees we know we want to ignore
|
94
|
-
for x in copydirs:
|
95
|
-
d = os.path.join(root, x, '')
|
96
|
-
debug(u"CHECK: %r" % d)
|
97
|
-
excluded = False
|
98
|
-
for r in cfg.exclude:
|
99
|
-
if r.search(d):
|
100
|
-
excluded = True
|
101
|
-
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
102
|
-
break
|
103
|
-
if excluded:
|
104
|
-
## No need to check for --include if not excluded
|
105
|
-
for r in cfg.include:
|
106
|
-
if r.search(d):
|
107
|
-
excluded = False
|
108
|
-
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
109
|
-
break
|
110
|
-
if excluded:
|
111
|
-
## Still excluded - ok, action it
|
112
|
-
debug(u"EXCLUDE: %r" % d)
|
113
|
-
dirs.remove(x)
|
114
|
-
continue
|
115
|
-
else:
|
116
|
-
debug(u"PASS: %r" % (d))
|
117
|
-
|
118
|
-
# exclude file matches in the current directory
|
119
|
-
for x in copyfiles:
|
120
|
-
file = os.path.join(root, x)
|
121
|
-
debug(u"CHECK: %r" % file)
|
122
|
-
excluded = False
|
123
|
-
for r in cfg.exclude:
|
124
|
-
if r.search(file):
|
125
|
-
excluded = True
|
126
|
-
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
127
|
-
break
|
128
|
-
if excluded:
|
129
|
-
## No need to check for --include if not excluded
|
130
|
-
for r in cfg.include:
|
131
|
-
if r.search(file):
|
132
|
-
excluded = False
|
133
|
-
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
134
|
-
break
|
135
|
-
if excluded:
|
136
|
-
## Still excluded - ok, action it
|
137
|
-
debug(u"EXCLUDE: %s" % file)
|
138
|
-
files.remove(x)
|
139
|
-
continue
|
140
|
-
else:
|
141
|
-
debug(u"PASS: %r" % (file))
|
142
|
-
|
143
84
|
def fetch_local_list(args, recursive = None):
|
144
|
-
def _get_filelist_local(
|
85
|
+
def _get_filelist_local(local_uri):
|
145
86
|
info(u"Compiling list of local files...")
|
146
|
-
|
147
|
-
if deunicodise(local_uri.basename()) == "-":
|
148
|
-
loc_list["-"] = {
|
149
|
-
'full_name_unicode' : '-',
|
150
|
-
'full_name' : '-',
|
151
|
-
'size' : -1,
|
152
|
-
'mtime' : -1,
|
153
|
-
}
|
154
|
-
return loc_list, True
|
155
87
|
if local_uri.isdir():
|
156
88
|
local_base = deunicodise(local_uri.basename())
|
157
89
|
local_path = deunicodise(local_uri.path())
|
158
|
-
|
159
|
-
filelist = _fswalk_follow_symlinks(local_path)
|
160
|
-
else:
|
161
|
-
filelist = _fswalk_no_symlinks(local_path)
|
90
|
+
filelist = _fswalk(local_path, cfg.follow_symlinks)
|
162
91
|
single_file = False
|
163
92
|
else:
|
164
93
|
local_base = ""
|
165
94
|
local_path = deunicodise(local_uri.dirname())
|
166
95
|
filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
|
167
96
|
single_file = True
|
97
|
+
loc_list = SortedDict(ignore_case = False)
|
168
98
|
for root, dirs, files in filelist:
|
169
99
|
rel_root = root.replace(local_path, local_base, 1)
|
170
100
|
for f in files:
|
@@ -188,43 +118,13 @@ def fetch_local_list(args, recursive = None):
|
|
188
118
|
'full_name' : full_name,
|
189
119
|
'size' : sr.st_size,
|
190
120
|
'mtime' : sr.st_mtime,
|
191
|
-
'dev' : sr.st_dev,
|
192
|
-
'inode' : sr.st_ino,
|
193
|
-
'uid' : sr.st_uid,
|
194
|
-
'gid' : sr.st_gid,
|
195
|
-
'sr': sr # save it all, may need it in preserve_attrs_list
|
196
121
|
## TODO: Possibly more to save here...
|
197
122
|
}
|
198
|
-
if 'md5' in cfg.sync_checks:
|
199
|
-
md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
|
200
|
-
if md5 is None:
|
201
|
-
try:
|
202
|
-
md5 = loc_list.get_md5(relative_file) # this does the file I/O
|
203
|
-
except IOError:
|
204
|
-
continue
|
205
|
-
cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
|
206
|
-
loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
|
207
123
|
return loc_list, single_file
|
208
124
|
|
209
|
-
def _maintain_cache(cache, local_list):
|
210
|
-
if cfg.cache_file:
|
211
|
-
cache.mark_all_for_purge()
|
212
|
-
for i in local_list.keys():
|
213
|
-
cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
|
214
|
-
cache.purge()
|
215
|
-
cache.save(cfg.cache_file)
|
216
|
-
|
217
125
|
cfg = Config()
|
218
|
-
|
219
|
-
cache = HashCache()
|
220
|
-
if cfg.cache_file:
|
221
|
-
try:
|
222
|
-
cache.load(cfg.cache_file)
|
223
|
-
except IOError:
|
224
|
-
info(u"No cache file found, creating it.")
|
225
|
-
|
226
126
|
local_uris = []
|
227
|
-
local_list =
|
127
|
+
local_list = SortedDict(ignore_case = False)
|
228
128
|
single_file = False
|
229
129
|
|
230
130
|
if type(args) not in (list, tuple):
|
@@ -242,7 +142,8 @@ def fetch_local_list(args, recursive = None):
|
|
242
142
|
local_uris.append(uri)
|
243
143
|
|
244
144
|
for uri in local_uris:
|
245
|
-
list_for_uri, single_file = _get_filelist_local(
|
145
|
+
list_for_uri, single_file = _get_filelist_local(uri)
|
146
|
+
local_list.update(list_for_uri)
|
246
147
|
|
247
148
|
## Single file is True if and only if the user
|
248
149
|
## specified one local URI and that URI represents
|
@@ -252,8 +153,6 @@ def fetch_local_list(args, recursive = None):
|
|
252
153
|
if len(local_list) > 1:
|
253
154
|
single_file = False
|
254
155
|
|
255
|
-
_maintain_cache(cache, local_list)
|
256
|
-
|
257
156
|
return local_list, single_file
|
258
157
|
|
259
158
|
def fetch_remote_list(args, require_attribs = False, recursive = None):
|
@@ -284,7 +183,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
284
183
|
rem_base = rem_base[:rem_base.rfind('/')+1]
|
285
184
|
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
|
286
185
|
rem_base_len = len(rem_base)
|
287
|
-
rem_list =
|
186
|
+
rem_list = SortedDict(ignore_case = False)
|
288
187
|
break_now = False
|
289
188
|
for object in response['list']:
|
290
189
|
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
|
@@ -292,7 +191,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
292
191
|
key = os.path.basename(object['Key'])
|
293
192
|
object_uri_str = remote_uri_original.uri()
|
294
193
|
break_now = True
|
295
|
-
rem_list =
|
194
|
+
rem_list = {} ## Remove whatever has already been put to rem_list
|
296
195
|
else:
|
297
196
|
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
|
298
197
|
object_uri_str = remote_uri.uri() + key
|
@@ -303,18 +202,14 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
303
202
|
'object_key' : object['Key'],
|
304
203
|
'object_uri_str' : object_uri_str,
|
305
204
|
'base_uri' : remote_uri,
|
306
|
-
'dev' : None,
|
307
|
-
'inode' : None,
|
308
205
|
}
|
309
|
-
md5 = object['ETag'][1:-1]
|
310
|
-
rem_list.record_md5(key, md5)
|
311
206
|
if break_now:
|
312
207
|
break
|
313
208
|
return rem_list
|
314
209
|
|
315
210
|
cfg = Config()
|
316
211
|
remote_uris = []
|
317
|
-
remote_list =
|
212
|
+
remote_list = SortedDict(ignore_case = False)
|
318
213
|
|
319
214
|
if type(args) not in (list, tuple):
|
320
215
|
args = [args]
|
@@ -333,7 +228,6 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
333
228
|
objectlist = _get_filelist_remote(uri)
|
334
229
|
for key in objectlist:
|
335
230
|
remote_list[key] = objectlist[key]
|
336
|
-
remote_list.record_md5(key, objectlist.get_md5(key))
|
337
231
|
else:
|
338
232
|
for uri in remote_uris:
|
339
233
|
uri_str = str(uri)
|
@@ -371,147 +265,81 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
|
|
371
265
|
'md5': response['headers']['etag'].strip('"\''),
|
372
266
|
'timestamp' : dateRFC822toUnix(response['headers']['date'])
|
373
267
|
})
|
374
|
-
# get md5 from header if it's present. We would have set that during upload
|
375
|
-
if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
|
376
|
-
attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
|
377
|
-
if attrs.has_key('md5'):
|
378
|
-
remote_item.update({'md5': attrs['md5']})
|
379
|
-
|
380
268
|
remote_list[key] = remote_item
|
381
269
|
return remote_list
|
382
270
|
|
383
|
-
def
|
384
|
-
attrs = {}
|
385
|
-
for attr in attrs_header.split("/"):
|
386
|
-
key, val = attr.split(":")
|
387
|
-
attrs[key] = val
|
388
|
-
return attrs
|
389
|
-
|
390
|
-
|
391
|
-
def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
|
271
|
+
def compare_filelists(src_list, dst_list, src_remote, dst_remote):
|
392
272
|
def __direction_str(is_remote):
|
393
273
|
return is_remote and "remote" or "local"
|
394
274
|
|
395
|
-
|
396
|
-
"""Return True if src_list[file] matches dst_list[file], else False"""
|
397
|
-
attribs_match = True
|
398
|
-
if not (src_list.has_key(file) and dst_list.has_key(file)):
|
399
|
-
info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
|
400
|
-
return False
|
401
|
-
|
402
|
-
## check size first
|
403
|
-
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
|
404
|
-
debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
|
405
|
-
attribs_match = False
|
406
|
-
|
407
|
-
## check md5
|
408
|
-
compare_md5 = 'md5' in cfg.sync_checks
|
409
|
-
# Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
|
410
|
-
if compare_md5:
|
411
|
-
if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
|
412
|
-
compare_md5 = False
|
413
|
-
info(u"disabled md5 check for %s" % file)
|
414
|
-
if attribs_match and compare_md5:
|
415
|
-
try:
|
416
|
-
src_md5 = src_list.get_md5(file)
|
417
|
-
dst_md5 = dst_list.get_md5(file)
|
418
|
-
except (IOError,OSError), e:
|
419
|
-
# md5 sum verification failed - ignore that file altogether
|
420
|
-
debug(u"IGNR: %s (disappeared)" % (file))
|
421
|
-
warning(u"%s: file disappeared, ignoring." % (file))
|
422
|
-
raise
|
423
|
-
|
424
|
-
if src_md5 != dst_md5:
|
425
|
-
## checksums are different.
|
426
|
-
attribs_match = False
|
427
|
-
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
|
428
|
-
|
429
|
-
return attribs_match
|
430
|
-
|
431
|
-
# we don't support local->local sync, use 'rsync' or something like that instead ;-)
|
275
|
+
# We don't support local->local sync, use 'rsync' or something like that instead ;-)
|
432
276
|
assert(not(src_remote == False and dst_remote == False))
|
433
277
|
|
434
278
|
info(u"Verifying attributes...")
|
435
279
|
cfg = Config()
|
436
|
-
|
437
|
-
## Items left on update_list will be transferred after src_list
|
438
|
-
## Items left on copy_pairs will be copied from dst1 to dst2
|
439
|
-
update_list = FileDict(ignore_case = False)
|
440
|
-
## Items left on dst_list will be deleted
|
441
|
-
copy_pairs = []
|
280
|
+
exists_list = SortedDict(ignore_case = False)
|
442
281
|
|
443
282
|
debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
|
283
|
+
debug("src_list.keys: %s" % src_list.keys())
|
284
|
+
debug("dst_list.keys: %s" % dst_list.keys())
|
444
285
|
|
445
|
-
for
|
446
|
-
debug(u"CHECK: %s" %
|
447
|
-
|
448
|
-
if dst_list.has_key(relative_file):
|
286
|
+
for file in src_list.keys():
|
287
|
+
debug(u"CHECK: %s" % file)
|
288
|
+
if dst_list.has_key(file):
|
449
289
|
## Was --skip-existing requested?
|
450
290
|
if cfg.skip_existing:
|
451
|
-
debug(u"IGNR: %s (used --skip-existing)" % (
|
452
|
-
|
453
|
-
del(
|
454
|
-
|
455
|
-
|
456
|
-
try:
|
457
|
-
same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
|
458
|
-
except (IOError,OSError), e:
|
459
|
-
debug(u"IGNR: %s (disappeared)" % (relative_file))
|
460
|
-
warning(u"%s: file disappeared, ignoring." % (relative_file))
|
461
|
-
del(src_list[relative_file])
|
462
|
-
del(dst_list[relative_file])
|
291
|
+
debug(u"IGNR: %s (used --skip-existing)" % (file))
|
292
|
+
exists_list[file] = src_list[file]
|
293
|
+
del(src_list[file])
|
294
|
+
## Remove from destination-list, all that is left there will be deleted
|
295
|
+
del(dst_list[file])
|
463
296
|
continue
|
464
297
|
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
298
|
+
attribs_match = True
|
299
|
+
## Check size first
|
300
|
+
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
|
301
|
+
debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
|
302
|
+
attribs_match = False
|
469
303
|
|
470
|
-
|
471
|
-
|
304
|
+
## Check MD5
|
305
|
+
compare_md5 = 'md5' in cfg.sync_checks
|
306
|
+
# Multipart-uploaded files don't have a valid MD5 sum - it ends with "...-NN"
|
307
|
+
if compare_md5 and (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
|
308
|
+
compare_md5 = False
|
309
|
+
info(u"Disabled MD5 check for %s" % file)
|
310
|
+
if attribs_match and compare_md5:
|
472
311
|
try:
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
del dst_list[relative_file]
|
312
|
+
if src_remote == False and dst_remote == True:
|
313
|
+
src_md5 = hash_file_md5(src_list[file]['full_name'])
|
314
|
+
dst_md5 = dst_list[file]['md5']
|
315
|
+
elif src_remote == True and dst_remote == False:
|
316
|
+
src_md5 = src_list[file]['md5']
|
317
|
+
dst_md5 = hash_file_md5(dst_list[file]['full_name'])
|
318
|
+
elif src_remote == True and dst_remote == True:
|
319
|
+
src_md5 = src_list[file]['md5']
|
320
|
+
dst_md5 = dst_list[file]['md5']
|
321
|
+
except (IOError,OSError), e:
|
322
|
+
# MD5 sum verification failed - ignore that file altogether
|
323
|
+
debug(u"IGNR: %s (disappeared)" % (file))
|
324
|
+
warning(u"%s: file disappeared, ignoring." % (file))
|
325
|
+
del(src_list[file])
|
326
|
+
del(dst_list[file])
|
327
|
+
continue
|
490
328
|
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
|
502
|
-
copy_pairs.append((src_list[relative_file], dst1, relative_file))
|
503
|
-
del(src_list[relative_file])
|
504
|
-
else:
|
505
|
-
# we don't have this file, and we don't have a copy of this file elsewhere. Get it.
|
506
|
-
# record that we will get this file transferred to us (before all the copies), so if we come across it later again,
|
507
|
-
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
|
508
|
-
dst_list.record_md5(relative_file, md5)
|
329
|
+
if src_md5 != dst_md5:
|
330
|
+
## Checksums are different.
|
331
|
+
attribs_match = False
|
332
|
+
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
|
333
|
+
|
334
|
+
if attribs_match:
|
335
|
+
## Remove from source-list, all that is left there will be transferred
|
336
|
+
debug(u"IGNR: %s (transfer not needed)" % file)
|
337
|
+
exists_list[file] = src_list[file]
|
338
|
+
del(src_list[file])
|
509
339
|
|
510
|
-
|
511
|
-
|
512
|
-
# leave only those not on src_list + update_list
|
513
|
-
del dst_list[f]
|
340
|
+
## Remove from destination-list, all that is left there will be deleted
|
341
|
+
del(dst_list[file])
|
514
342
|
|
515
|
-
return src_list, dst_list,
|
343
|
+
return src_list, dst_list, exists_list
|
516
344
|
|
517
345
|
# vim:et:ts=4:sts=4:ai
|