s3_cmd_bin 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +28 -0
- data/Rakefile +1 -0
- data/lib/s3_cmd_bin/version.rb +3 -0
- data/lib/s3_cmd_bin.rb +15 -0
- data/resources/ChangeLog +1462 -0
- data/resources/INSTALL +97 -0
- data/resources/LICENSE +339 -0
- data/resources/MANIFEST.in +2 -0
- data/resources/Makefile +4 -0
- data/resources/NEWS +234 -0
- data/resources/README +342 -0
- data/resources/S3/ACL.py +224 -0
- data/resources/S3/ACL.pyc +0 -0
- data/resources/S3/AccessLog.py +92 -0
- data/resources/S3/AccessLog.pyc +0 -0
- data/resources/S3/BidirMap.py +42 -0
- data/resources/S3/BidirMap.pyc +0 -0
- data/resources/S3/CloudFront.py +773 -0
- data/resources/S3/CloudFront.pyc +0 -0
- data/resources/S3/Config.py +294 -0
- data/resources/S3/Config.pyc +0 -0
- data/resources/S3/ConnMan.py +71 -0
- data/resources/S3/ConnMan.pyc +0 -0
- data/resources/S3/Exceptions.py +88 -0
- data/resources/S3/Exceptions.pyc +0 -0
- data/resources/S3/FileDict.py +53 -0
- data/resources/S3/FileDict.pyc +0 -0
- data/resources/S3/FileLists.py +517 -0
- data/resources/S3/FileLists.pyc +0 -0
- data/resources/S3/HashCache.py +53 -0
- data/resources/S3/HashCache.pyc +0 -0
- data/resources/S3/MultiPart.py +137 -0
- data/resources/S3/MultiPart.pyc +0 -0
- data/resources/S3/PkgInfo.py +14 -0
- data/resources/S3/PkgInfo.pyc +0 -0
- data/resources/S3/Progress.py +173 -0
- data/resources/S3/Progress.pyc +0 -0
- data/resources/S3/S3.py +979 -0
- data/resources/S3/S3.pyc +0 -0
- data/resources/S3/S3Uri.py +223 -0
- data/resources/S3/S3Uri.pyc +0 -0
- data/resources/S3/SimpleDB.py +178 -0
- data/resources/S3/SortedDict.py +66 -0
- data/resources/S3/SortedDict.pyc +0 -0
- data/resources/S3/Utils.py +462 -0
- data/resources/S3/Utils.pyc +0 -0
- data/resources/S3/__init__.py +0 -0
- data/resources/S3/__init__.pyc +0 -0
- data/resources/TODO +52 -0
- data/resources/artwork/AtomicClockRadio.ttf +0 -0
- data/resources/artwork/TypeRa.ttf +0 -0
- data/resources/artwork/site-top-full-size.xcf +0 -0
- data/resources/artwork/site-top-label-download.png +0 -0
- data/resources/artwork/site-top-label-s3cmd.png +0 -0
- data/resources/artwork/site-top-label-s3sync.png +0 -0
- data/resources/artwork/site-top-s3tools-logo.png +0 -0
- data/resources/artwork/site-top.jpg +0 -0
- data/resources/artwork/site-top.png +0 -0
- data/resources/artwork/site-top.xcf +0 -0
- data/resources/format-manpage.pl +196 -0
- data/resources/magic +63 -0
- data/resources/run-tests.py +537 -0
- data/resources/s3cmd +2116 -0
- data/resources/s3cmd.1 +435 -0
- data/resources/s3db +55 -0
- data/resources/setup.cfg +2 -0
- data/resources/setup.py +80 -0
- data/resources/testsuite.tar.gz +0 -0
- data/resources/upload-to-sf.sh +7 -0
- data/s3_cmd_bin.gemspec +23 -0
- metadata +152 -0
@@ -0,0 +1,517 @@
|
|
1
|
+
## Create and compare lists of files/objects
|
2
|
+
## Author: Michal Ludvig <michal@logix.cz>
|
3
|
+
## http://www.logix.cz/michal
|
4
|
+
## License: GPL Version 2
|
5
|
+
|
6
|
+
from S3 import S3
|
7
|
+
from Config import Config
|
8
|
+
from S3Uri import S3Uri
|
9
|
+
from FileDict import FileDict
|
10
|
+
from Utils import *
|
11
|
+
from Exceptions import ParameterError
|
12
|
+
from HashCache import HashCache
|
13
|
+
|
14
|
+
from logging import debug, info, warning, error
|
15
|
+
|
16
|
+
import os
|
17
|
+
import glob
|
18
|
+
import copy
|
19
|
+
|
20
|
+
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
|
21
|
+
|
22
|
+
def _fswalk_follow_symlinks(path):
|
23
|
+
'''
|
24
|
+
Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
|
25
|
+
|
26
|
+
If a symlink directory loop is detected, emit a warning and skip.
|
27
|
+
E.g.: dir1/dir2/sym-dir -> ../dir2
|
28
|
+
'''
|
29
|
+
assert os.path.isdir(path) # only designed for directory argument
|
30
|
+
walkdirs = set([path])
|
31
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
32
|
+
handle_exclude_include_walk(dirpath, dirnames, [])
|
33
|
+
real_dirpath = os.path.realpath(dirpath)
|
34
|
+
for dirname in dirnames:
|
35
|
+
current = os.path.join(dirpath, dirname)
|
36
|
+
real_current = os.path.realpath(current)
|
37
|
+
if os.path.islink(current):
|
38
|
+
if (real_dirpath == real_current or
|
39
|
+
real_dirpath.startswith(real_current + os.path.sep)):
|
40
|
+
warning("Skipping recursively symlinked directory %s" % dirname)
|
41
|
+
else:
|
42
|
+
walkdirs.add(current)
|
43
|
+
for walkdir in walkdirs:
|
44
|
+
for dirpath, dirnames, filenames in os.walk(walkdir):
|
45
|
+
handle_exclude_include_walk(dirpath, dirnames, [])
|
46
|
+
yield (dirpath, dirnames, filenames)
|
47
|
+
|
48
|
+
def _fswalk_no_symlinks(path):
|
49
|
+
'''
|
50
|
+
Directory tree generator
|
51
|
+
|
52
|
+
path (str) is the root of the directory tree to walk
|
53
|
+
'''
|
54
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
55
|
+
handle_exclude_include_walk(dirpath, dirnames, filenames)
|
56
|
+
yield (dirpath, dirnames, filenames)
|
57
|
+
|
58
|
+
def filter_exclude_include(src_list):
|
59
|
+
info(u"Applying --exclude/--include")
|
60
|
+
cfg = Config()
|
61
|
+
exclude_list = FileDict(ignore_case = False)
|
62
|
+
for file in src_list.keys():
|
63
|
+
debug(u"CHECK: %s" % file)
|
64
|
+
excluded = False
|
65
|
+
for r in cfg.exclude:
|
66
|
+
if r.search(file):
|
67
|
+
excluded = True
|
68
|
+
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
69
|
+
break
|
70
|
+
if excluded:
|
71
|
+
## No need to check for --include if not excluded
|
72
|
+
for r in cfg.include:
|
73
|
+
if r.search(file):
|
74
|
+
excluded = False
|
75
|
+
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
76
|
+
break
|
77
|
+
if excluded:
|
78
|
+
## Still excluded - ok, action it
|
79
|
+
debug(u"EXCLUDE: %s" % file)
|
80
|
+
exclude_list[file] = src_list[file]
|
81
|
+
del(src_list[file])
|
82
|
+
continue
|
83
|
+
else:
|
84
|
+
debug(u"PASS: %r" % (file))
|
85
|
+
return src_list, exclude_list
|
86
|
+
|
87
|
+
def handle_exclude_include_walk(root, dirs, files):
|
88
|
+
cfg = Config()
|
89
|
+
copydirs = copy.copy(dirs)
|
90
|
+
copyfiles = copy.copy(files)
|
91
|
+
|
92
|
+
# exclude dir matches in the current directory
|
93
|
+
# this prevents us from recursing down trees we know we want to ignore
|
94
|
+
for x in copydirs:
|
95
|
+
d = os.path.join(root, x, '')
|
96
|
+
debug(u"CHECK: %r" % d)
|
97
|
+
excluded = False
|
98
|
+
for r in cfg.exclude:
|
99
|
+
if r.search(d):
|
100
|
+
excluded = True
|
101
|
+
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
102
|
+
break
|
103
|
+
if excluded:
|
104
|
+
## No need to check for --include if not excluded
|
105
|
+
for r in cfg.include:
|
106
|
+
if r.search(d):
|
107
|
+
excluded = False
|
108
|
+
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
109
|
+
break
|
110
|
+
if excluded:
|
111
|
+
## Still excluded - ok, action it
|
112
|
+
debug(u"EXCLUDE: %r" % d)
|
113
|
+
dirs.remove(x)
|
114
|
+
continue
|
115
|
+
else:
|
116
|
+
debug(u"PASS: %r" % (d))
|
117
|
+
|
118
|
+
# exclude file matches in the current directory
|
119
|
+
for x in copyfiles:
|
120
|
+
file = os.path.join(root, x)
|
121
|
+
debug(u"CHECK: %r" % file)
|
122
|
+
excluded = False
|
123
|
+
for r in cfg.exclude:
|
124
|
+
if r.search(file):
|
125
|
+
excluded = True
|
126
|
+
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
|
127
|
+
break
|
128
|
+
if excluded:
|
129
|
+
## No need to check for --include if not excluded
|
130
|
+
for r in cfg.include:
|
131
|
+
if r.search(file):
|
132
|
+
excluded = False
|
133
|
+
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
|
134
|
+
break
|
135
|
+
if excluded:
|
136
|
+
## Still excluded - ok, action it
|
137
|
+
debug(u"EXCLUDE: %s" % file)
|
138
|
+
files.remove(x)
|
139
|
+
continue
|
140
|
+
else:
|
141
|
+
debug(u"PASS: %r" % (file))
|
142
|
+
|
143
|
+
def fetch_local_list(args, recursive = None):
|
144
|
+
def _get_filelist_local(loc_list, local_uri, cache):
|
145
|
+
info(u"Compiling list of local files...")
|
146
|
+
|
147
|
+
if deunicodise(local_uri.basename()) == "-":
|
148
|
+
loc_list["-"] = {
|
149
|
+
'full_name_unicode' : '-',
|
150
|
+
'full_name' : '-',
|
151
|
+
'size' : -1,
|
152
|
+
'mtime' : -1,
|
153
|
+
}
|
154
|
+
return loc_list, True
|
155
|
+
if local_uri.isdir():
|
156
|
+
local_base = deunicodise(local_uri.basename())
|
157
|
+
local_path = deunicodise(local_uri.path())
|
158
|
+
if cfg.follow_symlinks:
|
159
|
+
filelist = _fswalk_follow_symlinks(local_path)
|
160
|
+
else:
|
161
|
+
filelist = _fswalk_no_symlinks(local_path)
|
162
|
+
single_file = False
|
163
|
+
else:
|
164
|
+
local_base = ""
|
165
|
+
local_path = deunicodise(local_uri.dirname())
|
166
|
+
filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
|
167
|
+
single_file = True
|
168
|
+
for root, dirs, files in filelist:
|
169
|
+
rel_root = root.replace(local_path, local_base, 1)
|
170
|
+
for f in files:
|
171
|
+
full_name = os.path.join(root, f)
|
172
|
+
if not os.path.isfile(full_name):
|
173
|
+
continue
|
174
|
+
if os.path.islink(full_name):
|
175
|
+
if not cfg.follow_symlinks:
|
176
|
+
continue
|
177
|
+
relative_file = unicodise(os.path.join(rel_root, f))
|
178
|
+
if os.path.sep != "/":
|
179
|
+
# Convert non-unix dir separators to '/'
|
180
|
+
relative_file = "/".join(relative_file.split(os.path.sep))
|
181
|
+
if cfg.urlencoding_mode == "normal":
|
182
|
+
relative_file = replace_nonprintables(relative_file)
|
183
|
+
if relative_file.startswith('./'):
|
184
|
+
relative_file = relative_file[2:]
|
185
|
+
sr = os.stat_result(os.lstat(full_name))
|
186
|
+
loc_list[relative_file] = {
|
187
|
+
'full_name_unicode' : unicodise(full_name),
|
188
|
+
'full_name' : full_name,
|
189
|
+
'size' : sr.st_size,
|
190
|
+
'mtime' : sr.st_mtime,
|
191
|
+
'dev' : sr.st_dev,
|
192
|
+
'inode' : sr.st_ino,
|
193
|
+
'uid' : sr.st_uid,
|
194
|
+
'gid' : sr.st_gid,
|
195
|
+
'sr': sr # save it all, may need it in preserve_attrs_list
|
196
|
+
## TODO: Possibly more to save here...
|
197
|
+
}
|
198
|
+
if 'md5' in cfg.sync_checks:
|
199
|
+
md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
|
200
|
+
if md5 is None:
|
201
|
+
try:
|
202
|
+
md5 = loc_list.get_md5(relative_file) # this does the file I/O
|
203
|
+
except IOError:
|
204
|
+
continue
|
205
|
+
cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
|
206
|
+
loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
|
207
|
+
return loc_list, single_file
|
208
|
+
|
209
|
+
def _maintain_cache(cache, local_list):
|
210
|
+
if cfg.cache_file:
|
211
|
+
cache.mark_all_for_purge()
|
212
|
+
for i in local_list.keys():
|
213
|
+
cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
|
214
|
+
cache.purge()
|
215
|
+
cache.save(cfg.cache_file)
|
216
|
+
|
217
|
+
cfg = Config()
|
218
|
+
|
219
|
+
cache = HashCache()
|
220
|
+
if cfg.cache_file:
|
221
|
+
try:
|
222
|
+
cache.load(cfg.cache_file)
|
223
|
+
except IOError:
|
224
|
+
info(u"No cache file found, creating it.")
|
225
|
+
|
226
|
+
local_uris = []
|
227
|
+
local_list = FileDict(ignore_case = False)
|
228
|
+
single_file = False
|
229
|
+
|
230
|
+
if type(args) not in (list, tuple):
|
231
|
+
args = [args]
|
232
|
+
|
233
|
+
if recursive == None:
|
234
|
+
recursive = cfg.recursive
|
235
|
+
|
236
|
+
for arg in args:
|
237
|
+
uri = S3Uri(arg)
|
238
|
+
if not uri.type == 'file':
|
239
|
+
raise ParameterError("Expecting filename or directory instead of: %s" % arg)
|
240
|
+
if uri.isdir() and not recursive:
|
241
|
+
raise ParameterError("Use --recursive to upload a directory: %s" % arg)
|
242
|
+
local_uris.append(uri)
|
243
|
+
|
244
|
+
for uri in local_uris:
|
245
|
+
list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
|
246
|
+
|
247
|
+
## Single file is True if and only if the user
|
248
|
+
## specified one local URI and that URI represents
|
249
|
+
## a FILE. Ie it is False if the URI was of a DIR
|
250
|
+
## and that dir contained only one FILE. That's not
|
251
|
+
## a case of single_file==True.
|
252
|
+
if len(local_list) > 1:
|
253
|
+
single_file = False
|
254
|
+
|
255
|
+
_maintain_cache(cache, local_list)
|
256
|
+
|
257
|
+
return local_list, single_file
|
258
|
+
|
259
|
+
def fetch_remote_list(args, require_attribs = False, recursive = None):
|
260
|
+
def _get_filelist_remote(remote_uri, recursive = True):
|
261
|
+
## If remote_uri ends with '/' then all remote files will have
|
262
|
+
## the remote_uri prefix removed in the relative path.
|
263
|
+
## If, on the other hand, the remote_uri ends with something else
|
264
|
+
## (probably alphanumeric symbol) we'll use the last path part
|
265
|
+
## in the relative path.
|
266
|
+
##
|
267
|
+
## Complicated, eh? See an example:
|
268
|
+
## _get_filelist_remote("s3://bckt/abc/def") may yield:
|
269
|
+
## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
|
270
|
+
## _get_filelist_remote("s3://bckt/abc/def/") will yield:
|
271
|
+
## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
|
272
|
+
## Furthermore a prefix-magic can restrict the return list:
|
273
|
+
## _get_filelist_remote("s3://bckt/abc/def/x") yields:
|
274
|
+
## { 'xyz/blah.txt' : {} }
|
275
|
+
|
276
|
+
info(u"Retrieving list of remote files for %s ..." % remote_uri)
|
277
|
+
|
278
|
+
s3 = S3(Config())
|
279
|
+
response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
|
280
|
+
|
281
|
+
rem_base_original = rem_base = remote_uri.object()
|
282
|
+
remote_uri_original = remote_uri
|
283
|
+
if rem_base != '' and rem_base[-1] != '/':
|
284
|
+
rem_base = rem_base[:rem_base.rfind('/')+1]
|
285
|
+
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
|
286
|
+
rem_base_len = len(rem_base)
|
287
|
+
rem_list = FileDict(ignore_case = False)
|
288
|
+
break_now = False
|
289
|
+
for object in response['list']:
|
290
|
+
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
|
291
|
+
## We asked for one file and we got that file :-)
|
292
|
+
key = os.path.basename(object['Key'])
|
293
|
+
object_uri_str = remote_uri_original.uri()
|
294
|
+
break_now = True
|
295
|
+
rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
|
296
|
+
else:
|
297
|
+
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
|
298
|
+
object_uri_str = remote_uri.uri() + key
|
299
|
+
rem_list[key] = {
|
300
|
+
'size' : int(object['Size']),
|
301
|
+
'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
|
302
|
+
'md5' : object['ETag'][1:-1],
|
303
|
+
'object_key' : object['Key'],
|
304
|
+
'object_uri_str' : object_uri_str,
|
305
|
+
'base_uri' : remote_uri,
|
306
|
+
'dev' : None,
|
307
|
+
'inode' : None,
|
308
|
+
}
|
309
|
+
md5 = object['ETag'][1:-1]
|
310
|
+
rem_list.record_md5(key, md5)
|
311
|
+
if break_now:
|
312
|
+
break
|
313
|
+
return rem_list
|
314
|
+
|
315
|
+
cfg = Config()
|
316
|
+
remote_uris = []
|
317
|
+
remote_list = FileDict(ignore_case = False)
|
318
|
+
|
319
|
+
if type(args) not in (list, tuple):
|
320
|
+
args = [args]
|
321
|
+
|
322
|
+
if recursive == None:
|
323
|
+
recursive = cfg.recursive
|
324
|
+
|
325
|
+
for arg in args:
|
326
|
+
uri = S3Uri(arg)
|
327
|
+
if not uri.type == 's3':
|
328
|
+
raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
|
329
|
+
remote_uris.append(uri)
|
330
|
+
|
331
|
+
if recursive:
|
332
|
+
for uri in remote_uris:
|
333
|
+
objectlist = _get_filelist_remote(uri)
|
334
|
+
for key in objectlist:
|
335
|
+
remote_list[key] = objectlist[key]
|
336
|
+
remote_list.record_md5(key, objectlist.get_md5(key))
|
337
|
+
else:
|
338
|
+
for uri in remote_uris:
|
339
|
+
uri_str = str(uri)
|
340
|
+
## Wildcards used in remote URI?
|
341
|
+
## If yes we'll need a bucket listing...
|
342
|
+
if uri_str.find('*') > -1 or uri_str.find('?') > -1:
|
343
|
+
first_wildcard = uri_str.find('*')
|
344
|
+
first_questionmark = uri_str.find('?')
|
345
|
+
if first_questionmark > -1 and first_questionmark < first_wildcard:
|
346
|
+
first_wildcard = first_questionmark
|
347
|
+
prefix = uri_str[:first_wildcard]
|
348
|
+
rest = uri_str[first_wildcard+1:]
|
349
|
+
## Only request recursive listing if the 'rest' of the URI,
|
350
|
+
## i.e. the part after first wildcard, contains '/'
|
351
|
+
need_recursion = rest.find('/') > -1
|
352
|
+
objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
|
353
|
+
for key in objectlist:
|
354
|
+
## Check whether the 'key' matches the requested wildcards
|
355
|
+
if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
|
356
|
+
remote_list[key] = objectlist[key]
|
357
|
+
else:
|
358
|
+
## No wildcards - simply append the given URI to the list
|
359
|
+
key = os.path.basename(uri.object())
|
360
|
+
if not key:
|
361
|
+
raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
|
362
|
+
remote_item = {
|
363
|
+
'base_uri': uri,
|
364
|
+
'object_uri_str': unicode(uri),
|
365
|
+
'object_key': uri.object()
|
366
|
+
}
|
367
|
+
if require_attribs:
|
368
|
+
response = S3(cfg).object_info(uri)
|
369
|
+
remote_item.update({
|
370
|
+
'size': int(response['headers']['content-length']),
|
371
|
+
'md5': response['headers']['etag'].strip('"\''),
|
372
|
+
'timestamp' : dateRFC822toUnix(response['headers']['date'])
|
373
|
+
})
|
374
|
+
# get md5 from header if it's present. We would have set that during upload
|
375
|
+
if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
|
376
|
+
attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
|
377
|
+
if attrs.has_key('md5'):
|
378
|
+
remote_item.update({'md5': attrs['md5']})
|
379
|
+
|
380
|
+
remote_list[key] = remote_item
|
381
|
+
return remote_list
|
382
|
+
|
383
|
+
def parse_attrs_header(attrs_header):
|
384
|
+
attrs = {}
|
385
|
+
for attr in attrs_header.split("/"):
|
386
|
+
key, val = attr.split(":")
|
387
|
+
attrs[key] = val
|
388
|
+
return attrs
|
389
|
+
|
390
|
+
|
391
|
+
def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
|
392
|
+
def __direction_str(is_remote):
|
393
|
+
return is_remote and "remote" or "local"
|
394
|
+
|
395
|
+
def _compare(src_list, dst_lst, src_remote, dst_remote, file):
|
396
|
+
"""Return True if src_list[file] matches dst_list[file], else False"""
|
397
|
+
attribs_match = True
|
398
|
+
if not (src_list.has_key(file) and dst_list.has_key(file)):
|
399
|
+
info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
|
400
|
+
return False
|
401
|
+
|
402
|
+
## check size first
|
403
|
+
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
|
404
|
+
debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
|
405
|
+
attribs_match = False
|
406
|
+
|
407
|
+
## check md5
|
408
|
+
compare_md5 = 'md5' in cfg.sync_checks
|
409
|
+
# Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
|
410
|
+
if compare_md5:
|
411
|
+
if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
|
412
|
+
compare_md5 = False
|
413
|
+
info(u"disabled md5 check for %s" % file)
|
414
|
+
if attribs_match and compare_md5:
|
415
|
+
try:
|
416
|
+
src_md5 = src_list.get_md5(file)
|
417
|
+
dst_md5 = dst_list.get_md5(file)
|
418
|
+
except (IOError,OSError), e:
|
419
|
+
# md5 sum verification failed - ignore that file altogether
|
420
|
+
debug(u"IGNR: %s (disappeared)" % (file))
|
421
|
+
warning(u"%s: file disappeared, ignoring." % (file))
|
422
|
+
raise
|
423
|
+
|
424
|
+
if src_md5 != dst_md5:
|
425
|
+
## checksums are different.
|
426
|
+
attribs_match = False
|
427
|
+
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
|
428
|
+
|
429
|
+
return attribs_match
|
430
|
+
|
431
|
+
# we don't support local->local sync, use 'rsync' or something like that instead ;-)
|
432
|
+
assert(not(src_remote == False and dst_remote == False))
|
433
|
+
|
434
|
+
info(u"Verifying attributes...")
|
435
|
+
cfg = Config()
|
436
|
+
## Items left on src_list will be transferred
|
437
|
+
## Items left on update_list will be transferred after src_list
|
438
|
+
## Items left on copy_pairs will be copied from dst1 to dst2
|
439
|
+
update_list = FileDict(ignore_case = False)
|
440
|
+
## Items left on dst_list will be deleted
|
441
|
+
copy_pairs = []
|
442
|
+
|
443
|
+
debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
|
444
|
+
|
445
|
+
for relative_file in src_list.keys():
|
446
|
+
debug(u"CHECK: %s" % (relative_file))
|
447
|
+
|
448
|
+
if dst_list.has_key(relative_file):
|
449
|
+
## Was --skip-existing requested?
|
450
|
+
if cfg.skip_existing:
|
451
|
+
debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
|
452
|
+
del(src_list[relative_file])
|
453
|
+
del(dst_list[relative_file])
|
454
|
+
continue
|
455
|
+
|
456
|
+
try:
|
457
|
+
same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
|
458
|
+
except (IOError,OSError), e:
|
459
|
+
debug(u"IGNR: %s (disappeared)" % (relative_file))
|
460
|
+
warning(u"%s: file disappeared, ignoring." % (relative_file))
|
461
|
+
del(src_list[relative_file])
|
462
|
+
del(dst_list[relative_file])
|
463
|
+
continue
|
464
|
+
|
465
|
+
if same_file:
|
466
|
+
debug(u"IGNR: %s (transfer not needed)" % relative_file)
|
467
|
+
del(src_list[relative_file])
|
468
|
+
del(dst_list[relative_file])
|
469
|
+
|
470
|
+
else:
|
471
|
+
# look for matching file in src
|
472
|
+
try:
|
473
|
+
md5 = src_list.get_md5(relative_file)
|
474
|
+
except IOError:
|
475
|
+
md5 = None
|
476
|
+
if md5 is not None and dst_list.by_md5.has_key(md5):
|
477
|
+
# Found one, we want to copy
|
478
|
+
dst1 = list(dst_list.by_md5[md5])[0]
|
479
|
+
debug(u"DST COPY src: %s -> %s" % (dst1, relative_file))
|
480
|
+
copy_pairs.append((src_list[relative_file], dst1, relative_file))
|
481
|
+
del(src_list[relative_file])
|
482
|
+
del(dst_list[relative_file])
|
483
|
+
else:
|
484
|
+
# record that we will get this file transferred to us (before all the copies), so if we come across it later again,
|
485
|
+
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
|
486
|
+
dst_list.record_md5(relative_file, md5)
|
487
|
+
update_list[relative_file] = src_list[relative_file]
|
488
|
+
del src_list[relative_file]
|
489
|
+
del dst_list[relative_file]
|
490
|
+
|
491
|
+
else:
|
492
|
+
# dst doesn't have this file
|
493
|
+
# look for matching file elsewhere in dst
|
494
|
+
try:
|
495
|
+
md5 = src_list.get_md5(relative_file)
|
496
|
+
except IOError:
|
497
|
+
md5 = None
|
498
|
+
dst1 = dst_list.find_md5_one(md5)
|
499
|
+
if dst1 is not None:
|
500
|
+
# Found one, we want to copy
|
501
|
+
debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
|
502
|
+
copy_pairs.append((src_list[relative_file], dst1, relative_file))
|
503
|
+
del(src_list[relative_file])
|
504
|
+
else:
|
505
|
+
# we don't have this file, and we don't have a copy of this file elsewhere. Get it.
|
506
|
+
# record that we will get this file transferred to us (before all the copies), so if we come across it later again,
|
507
|
+
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
|
508
|
+
dst_list.record_md5(relative_file, md5)
|
509
|
+
|
510
|
+
for f in dst_list.keys():
|
511
|
+
if src_list.has_key(f) or update_list.has_key(f):
|
512
|
+
# leave only those not on src_list + update_list
|
513
|
+
del dst_list[f]
|
514
|
+
|
515
|
+
return src_list, dst_list, update_list, copy_pairs
|
516
|
+
|
517
|
+
# vim:et:ts=4:sts=4:ai
|
Binary file
|
@@ -0,0 +1,53 @@
|
|
1
|
+
import cPickle as pickle
|
2
|
+
|
3
|
+
class HashCache(object):
|
4
|
+
def __init__(self):
|
5
|
+
self.inodes = dict()
|
6
|
+
|
7
|
+
def add(self, dev, inode, mtime, size, md5):
|
8
|
+
if dev not in self.inodes:
|
9
|
+
self.inodes[dev] = dict()
|
10
|
+
if inode not in self.inodes[dev]:
|
11
|
+
self.inodes[dev][inode] = dict()
|
12
|
+
self.inodes[dev][inode][mtime] = dict(md5=md5, size=size)
|
13
|
+
|
14
|
+
def md5(self, dev, inode, mtime, size):
|
15
|
+
try:
|
16
|
+
d = self.inodes[dev][inode][mtime]
|
17
|
+
if d['size'] != size:
|
18
|
+
return None
|
19
|
+
except:
|
20
|
+
return None
|
21
|
+
return d['md5']
|
22
|
+
|
23
|
+
def mark_all_for_purge(self):
|
24
|
+
for d in self.inodes.keys():
|
25
|
+
for i in self.inodes[d].keys():
|
26
|
+
for c in self.inodes[d][i].keys():
|
27
|
+
self.inodes[d][i][c]['purge'] = True
|
28
|
+
|
29
|
+
def unmark_for_purge(self, dev, inode, mtime, size):
|
30
|
+
d = self.inodes[dev][inode][mtime]
|
31
|
+
if d['size'] == size and 'purge' in d:
|
32
|
+
del self.inodes[dev][inode][mtime]['purge']
|
33
|
+
|
34
|
+
def purge(self):
|
35
|
+
for d in self.inodes.keys():
|
36
|
+
for i in self.inodes[d].keys():
|
37
|
+
for m in self.inodes[d][i].keys():
|
38
|
+
if 'purge' in self.inodes[d][i][m]:
|
39
|
+
del self.inodes[d][i]
|
40
|
+
break
|
41
|
+
|
42
|
+
def save(self, f):
|
43
|
+
d = dict(inodes=self.inodes, version=1)
|
44
|
+
f = open(f, 'w')
|
45
|
+
p = pickle.dump(d, f)
|
46
|
+
f.close()
|
47
|
+
|
48
|
+
def load(self, f):
|
49
|
+
f = open(f, 'r')
|
50
|
+
d = pickle.load(f)
|
51
|
+
f.close()
|
52
|
+
if d.get('version') == 1 and 'inodes' in d:
|
53
|
+
self.inodes = d['inodes']
|
Binary file
|