s3_cmd_bin 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +28 -0
  5. data/Rakefile +1 -0
  6. data/lib/s3_cmd_bin/version.rb +3 -0
  7. data/lib/s3_cmd_bin.rb +15 -0
  8. data/resources/ChangeLog +1462 -0
  9. data/resources/INSTALL +97 -0
  10. data/resources/LICENSE +339 -0
  11. data/resources/MANIFEST.in +2 -0
  12. data/resources/Makefile +4 -0
  13. data/resources/NEWS +234 -0
  14. data/resources/README +342 -0
  15. data/resources/S3/ACL.py +224 -0
  16. data/resources/S3/ACL.pyc +0 -0
  17. data/resources/S3/AccessLog.py +92 -0
  18. data/resources/S3/AccessLog.pyc +0 -0
  19. data/resources/S3/BidirMap.py +42 -0
  20. data/resources/S3/BidirMap.pyc +0 -0
  21. data/resources/S3/CloudFront.py +773 -0
  22. data/resources/S3/CloudFront.pyc +0 -0
  23. data/resources/S3/Config.py +294 -0
  24. data/resources/S3/Config.pyc +0 -0
  25. data/resources/S3/ConnMan.py +71 -0
  26. data/resources/S3/ConnMan.pyc +0 -0
  27. data/resources/S3/Exceptions.py +88 -0
  28. data/resources/S3/Exceptions.pyc +0 -0
  29. data/resources/S3/FileDict.py +53 -0
  30. data/resources/S3/FileDict.pyc +0 -0
  31. data/resources/S3/FileLists.py +517 -0
  32. data/resources/S3/FileLists.pyc +0 -0
  33. data/resources/S3/HashCache.py +53 -0
  34. data/resources/S3/HashCache.pyc +0 -0
  35. data/resources/S3/MultiPart.py +137 -0
  36. data/resources/S3/MultiPart.pyc +0 -0
  37. data/resources/S3/PkgInfo.py +14 -0
  38. data/resources/S3/PkgInfo.pyc +0 -0
  39. data/resources/S3/Progress.py +173 -0
  40. data/resources/S3/Progress.pyc +0 -0
  41. data/resources/S3/S3.py +979 -0
  42. data/resources/S3/S3.pyc +0 -0
  43. data/resources/S3/S3Uri.py +223 -0
  44. data/resources/S3/S3Uri.pyc +0 -0
  45. data/resources/S3/SimpleDB.py +178 -0
  46. data/resources/S3/SortedDict.py +66 -0
  47. data/resources/S3/SortedDict.pyc +0 -0
  48. data/resources/S3/Utils.py +462 -0
  49. data/resources/S3/Utils.pyc +0 -0
  50. data/resources/S3/__init__.py +0 -0
  51. data/resources/S3/__init__.pyc +0 -0
  52. data/resources/TODO +52 -0
  53. data/resources/artwork/AtomicClockRadio.ttf +0 -0
  54. data/resources/artwork/TypeRa.ttf +0 -0
  55. data/resources/artwork/site-top-full-size.xcf +0 -0
  56. data/resources/artwork/site-top-label-download.png +0 -0
  57. data/resources/artwork/site-top-label-s3cmd.png +0 -0
  58. data/resources/artwork/site-top-label-s3sync.png +0 -0
  59. data/resources/artwork/site-top-s3tools-logo.png +0 -0
  60. data/resources/artwork/site-top.jpg +0 -0
  61. data/resources/artwork/site-top.png +0 -0
  62. data/resources/artwork/site-top.xcf +0 -0
  63. data/resources/format-manpage.pl +196 -0
  64. data/resources/magic +63 -0
  65. data/resources/run-tests.py +537 -0
  66. data/resources/s3cmd +2116 -0
  67. data/resources/s3cmd.1 +435 -0
  68. data/resources/s3db +55 -0
  69. data/resources/setup.cfg +2 -0
  70. data/resources/setup.py +80 -0
  71. data/resources/testsuite.tar.gz +0 -0
  72. data/resources/upload-to-sf.sh +7 -0
  73. data/s3_cmd_bin.gemspec +23 -0
  74. metadata +152 -0
@@ -0,0 +1,517 @@
1
+ ## Create and compare lists of files/objects
2
+ ## Author: Michal Ludvig <michal@logix.cz>
3
+ ## http://www.logix.cz/michal
4
+ ## License: GPL Version 2
5
+
6
+ from S3 import S3
7
+ from Config import Config
8
+ from S3Uri import S3Uri
9
+ from FileDict import FileDict
10
+ from Utils import *
11
+ from Exceptions import ParameterError
12
+ from HashCache import HashCache
13
+
14
+ from logging import debug, info, warning, error
15
+
16
+ import os
17
+ import glob
18
+ import copy
19
+
20
+ __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
21
+
22
+ def _fswalk_follow_symlinks(path):
23
+ '''
24
+ Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
25
+
26
+ If a symlink directory loop is detected, emit a warning and skip.
27
+ E.g.: dir1/dir2/sym-dir -> ../dir2
28
+ '''
29
+ assert os.path.isdir(path) # only designed for directory argument
30
+ walkdirs = set([path])
31
+ for dirpath, dirnames, filenames in os.walk(path):
32
+ handle_exclude_include_walk(dirpath, dirnames, [])
33
+ real_dirpath = os.path.realpath(dirpath)
34
+ for dirname in dirnames:
35
+ current = os.path.join(dirpath, dirname)
36
+ real_current = os.path.realpath(current)
37
+ if os.path.islink(current):
38
+ if (real_dirpath == real_current or
39
+ real_dirpath.startswith(real_current + os.path.sep)):
40
+ warning("Skipping recursively symlinked directory %s" % dirname)
41
+ else:
42
+ walkdirs.add(current)
43
+ for walkdir in walkdirs:
44
+ for dirpath, dirnames, filenames in os.walk(walkdir):
45
+ handle_exclude_include_walk(dirpath, dirnames, [])
46
+ yield (dirpath, dirnames, filenames)
47
+
48
+ def _fswalk_no_symlinks(path):
49
+ '''
50
+ Directory tree generator
51
+
52
+ path (str) is the root of the directory tree to walk
53
+ '''
54
+ for dirpath, dirnames, filenames in os.walk(path):
55
+ handle_exclude_include_walk(dirpath, dirnames, filenames)
56
+ yield (dirpath, dirnames, filenames)
57
+
58
+ def filter_exclude_include(src_list):
59
+ info(u"Applying --exclude/--include")
60
+ cfg = Config()
61
+ exclude_list = FileDict(ignore_case = False)
62
+ for file in src_list.keys():
63
+ debug(u"CHECK: %s" % file)
64
+ excluded = False
65
+ for r in cfg.exclude:
66
+ if r.search(file):
67
+ excluded = True
68
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
69
+ break
70
+ if excluded:
71
+ ## No need to check for --include if not excluded
72
+ for r in cfg.include:
73
+ if r.search(file):
74
+ excluded = False
75
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
76
+ break
77
+ if excluded:
78
+ ## Still excluded - ok, action it
79
+ debug(u"EXCLUDE: %s" % file)
80
+ exclude_list[file] = src_list[file]
81
+ del(src_list[file])
82
+ continue
83
+ else:
84
+ debug(u"PASS: %r" % (file))
85
+ return src_list, exclude_list
86
+
87
+ def handle_exclude_include_walk(root, dirs, files):
88
+ cfg = Config()
89
+ copydirs = copy.copy(dirs)
90
+ copyfiles = copy.copy(files)
91
+
92
+ # exclude dir matches in the current directory
93
+ # this prevents us from recursing down trees we know we want to ignore
94
+ for x in copydirs:
95
+ d = os.path.join(root, x, '')
96
+ debug(u"CHECK: %r" % d)
97
+ excluded = False
98
+ for r in cfg.exclude:
99
+ if r.search(d):
100
+ excluded = True
101
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
102
+ break
103
+ if excluded:
104
+ ## No need to check for --include if not excluded
105
+ for r in cfg.include:
106
+ if r.search(d):
107
+ excluded = False
108
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
109
+ break
110
+ if excluded:
111
+ ## Still excluded - ok, action it
112
+ debug(u"EXCLUDE: %r" % d)
113
+ dirs.remove(x)
114
+ continue
115
+ else:
116
+ debug(u"PASS: %r" % (d))
117
+
118
+ # exclude file matches in the current directory
119
+ for x in copyfiles:
120
+ file = os.path.join(root, x)
121
+ debug(u"CHECK: %r" % file)
122
+ excluded = False
123
+ for r in cfg.exclude:
124
+ if r.search(file):
125
+ excluded = True
126
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
127
+ break
128
+ if excluded:
129
+ ## No need to check for --include if not excluded
130
+ for r in cfg.include:
131
+ if r.search(file):
132
+ excluded = False
133
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
134
+ break
135
+ if excluded:
136
+ ## Still excluded - ok, action it
137
+ debug(u"EXCLUDE: %s" % file)
138
+ files.remove(x)
139
+ continue
140
+ else:
141
+ debug(u"PASS: %r" % (file))
142
+
143
+ def fetch_local_list(args, recursive = None):
144
+ def _get_filelist_local(loc_list, local_uri, cache):
145
+ info(u"Compiling list of local files...")
146
+
147
+ if deunicodise(local_uri.basename()) == "-":
148
+ loc_list["-"] = {
149
+ 'full_name_unicode' : '-',
150
+ 'full_name' : '-',
151
+ 'size' : -1,
152
+ 'mtime' : -1,
153
+ }
154
+ return loc_list, True
155
+ if local_uri.isdir():
156
+ local_base = deunicodise(local_uri.basename())
157
+ local_path = deunicodise(local_uri.path())
158
+ if cfg.follow_symlinks:
159
+ filelist = _fswalk_follow_symlinks(local_path)
160
+ else:
161
+ filelist = _fswalk_no_symlinks(local_path)
162
+ single_file = False
163
+ else:
164
+ local_base = ""
165
+ local_path = deunicodise(local_uri.dirname())
166
+ filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
167
+ single_file = True
168
+ for root, dirs, files in filelist:
169
+ rel_root = root.replace(local_path, local_base, 1)
170
+ for f in files:
171
+ full_name = os.path.join(root, f)
172
+ if not os.path.isfile(full_name):
173
+ continue
174
+ if os.path.islink(full_name):
175
+ if not cfg.follow_symlinks:
176
+ continue
177
+ relative_file = unicodise(os.path.join(rel_root, f))
178
+ if os.path.sep != "/":
179
+ # Convert non-unix dir separators to '/'
180
+ relative_file = "/".join(relative_file.split(os.path.sep))
181
+ if cfg.urlencoding_mode == "normal":
182
+ relative_file = replace_nonprintables(relative_file)
183
+ if relative_file.startswith('./'):
184
+ relative_file = relative_file[2:]
185
+ sr = os.stat_result(os.lstat(full_name))
186
+ loc_list[relative_file] = {
187
+ 'full_name_unicode' : unicodise(full_name),
188
+ 'full_name' : full_name,
189
+ 'size' : sr.st_size,
190
+ 'mtime' : sr.st_mtime,
191
+ 'dev' : sr.st_dev,
192
+ 'inode' : sr.st_ino,
193
+ 'uid' : sr.st_uid,
194
+ 'gid' : sr.st_gid,
195
+ 'sr': sr # save it all, may need it in preserve_attrs_list
196
+ ## TODO: Possibly more to save here...
197
+ }
198
+ if 'md5' in cfg.sync_checks:
199
+ md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
200
+ if md5 is None:
201
+ try:
202
+ md5 = loc_list.get_md5(relative_file) # this does the file I/O
203
+ except IOError:
204
+ continue
205
+ cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
206
+ loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
207
+ return loc_list, single_file
208
+
209
+ def _maintain_cache(cache, local_list):
210
+ if cfg.cache_file:
211
+ cache.mark_all_for_purge()
212
+ for i in local_list.keys():
213
+ cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
214
+ cache.purge()
215
+ cache.save(cfg.cache_file)
216
+
217
+ cfg = Config()
218
+
219
+ cache = HashCache()
220
+ if cfg.cache_file:
221
+ try:
222
+ cache.load(cfg.cache_file)
223
+ except IOError:
224
+ info(u"No cache file found, creating it.")
225
+
226
+ local_uris = []
227
+ local_list = FileDict(ignore_case = False)
228
+ single_file = False
229
+
230
+ if type(args) not in (list, tuple):
231
+ args = [args]
232
+
233
+ if recursive == None:
234
+ recursive = cfg.recursive
235
+
236
+ for arg in args:
237
+ uri = S3Uri(arg)
238
+ if not uri.type == 'file':
239
+ raise ParameterError("Expecting filename or directory instead of: %s" % arg)
240
+ if uri.isdir() and not recursive:
241
+ raise ParameterError("Use --recursive to upload a directory: %s" % arg)
242
+ local_uris.append(uri)
243
+
244
+ for uri in local_uris:
245
+ list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
246
+
247
+ ## Single file is True if and only if the user
248
+ ## specified one local URI and that URI represents
249
+ ## a FILE. Ie it is False if the URI was of a DIR
250
+ ## and that dir contained only one FILE. That's not
251
+ ## a case of single_file==True.
252
+ if len(local_list) > 1:
253
+ single_file = False
254
+
255
+ _maintain_cache(cache, local_list)
256
+
257
+ return local_list, single_file
258
+
259
+ def fetch_remote_list(args, require_attribs = False, recursive = None):
260
+ def _get_filelist_remote(remote_uri, recursive = True):
261
+ ## If remote_uri ends with '/' then all remote files will have
262
+ ## the remote_uri prefix removed in the relative path.
263
+ ## If, on the other hand, the remote_uri ends with something else
264
+ ## (probably alphanumeric symbol) we'll use the last path part
265
+ ## in the relative path.
266
+ ##
267
+ ## Complicated, eh? See an example:
268
+ ## _get_filelist_remote("s3://bckt/abc/def") may yield:
269
+ ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
270
+ ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
271
+ ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
272
+ ## Furthermore a prefix-magic can restrict the return list:
273
+ ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
274
+ ## { 'xyz/blah.txt' : {} }
275
+
276
+ info(u"Retrieving list of remote files for %s ..." % remote_uri)
277
+
278
+ s3 = S3(Config())
279
+ response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
280
+
281
+ rem_base_original = rem_base = remote_uri.object()
282
+ remote_uri_original = remote_uri
283
+ if rem_base != '' and rem_base[-1] != '/':
284
+ rem_base = rem_base[:rem_base.rfind('/')+1]
285
+ remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
286
+ rem_base_len = len(rem_base)
287
+ rem_list = FileDict(ignore_case = False)
288
+ break_now = False
289
+ for object in response['list']:
290
+ if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
291
+ ## We asked for one file and we got that file :-)
292
+ key = os.path.basename(object['Key'])
293
+ object_uri_str = remote_uri_original.uri()
294
+ break_now = True
295
+ rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
296
+ else:
297
+ key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
298
+ object_uri_str = remote_uri.uri() + key
299
+ rem_list[key] = {
300
+ 'size' : int(object['Size']),
301
+ 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
302
+ 'md5' : object['ETag'][1:-1],
303
+ 'object_key' : object['Key'],
304
+ 'object_uri_str' : object_uri_str,
305
+ 'base_uri' : remote_uri,
306
+ 'dev' : None,
307
+ 'inode' : None,
308
+ }
309
+ md5 = object['ETag'][1:-1]
310
+ rem_list.record_md5(key, md5)
311
+ if break_now:
312
+ break
313
+ return rem_list
314
+
315
+ cfg = Config()
316
+ remote_uris = []
317
+ remote_list = FileDict(ignore_case = False)
318
+
319
+ if type(args) not in (list, tuple):
320
+ args = [args]
321
+
322
+ if recursive == None:
323
+ recursive = cfg.recursive
324
+
325
+ for arg in args:
326
+ uri = S3Uri(arg)
327
+ if not uri.type == 's3':
328
+ raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
329
+ remote_uris.append(uri)
330
+
331
+ if recursive:
332
+ for uri in remote_uris:
333
+ objectlist = _get_filelist_remote(uri)
334
+ for key in objectlist:
335
+ remote_list[key] = objectlist[key]
336
+ remote_list.record_md5(key, objectlist.get_md5(key))
337
+ else:
338
+ for uri in remote_uris:
339
+ uri_str = str(uri)
340
+ ## Wildcards used in remote URI?
341
+ ## If yes we'll need a bucket listing...
342
+ if uri_str.find('*') > -1 or uri_str.find('?') > -1:
343
+ first_wildcard = uri_str.find('*')
344
+ first_questionmark = uri_str.find('?')
345
+ if first_questionmark > -1 and first_questionmark < first_wildcard:
346
+ first_wildcard = first_questionmark
347
+ prefix = uri_str[:first_wildcard]
348
+ rest = uri_str[first_wildcard+1:]
349
+ ## Only request recursive listing if the 'rest' of the URI,
350
+ ## i.e. the part after first wildcard, contains '/'
351
+ need_recursion = rest.find('/') > -1
352
+ objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
353
+ for key in objectlist:
354
+ ## Check whether the 'key' matches the requested wildcards
355
+ if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
356
+ remote_list[key] = objectlist[key]
357
+ else:
358
+ ## No wildcards - simply append the given URI to the list
359
+ key = os.path.basename(uri.object())
360
+ if not key:
361
+ raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
362
+ remote_item = {
363
+ 'base_uri': uri,
364
+ 'object_uri_str': unicode(uri),
365
+ 'object_key': uri.object()
366
+ }
367
+ if require_attribs:
368
+ response = S3(cfg).object_info(uri)
369
+ remote_item.update({
370
+ 'size': int(response['headers']['content-length']),
371
+ 'md5': response['headers']['etag'].strip('"\''),
372
+ 'timestamp' : dateRFC822toUnix(response['headers']['date'])
373
+ })
374
+ # get md5 from header if it's present. We would have set that during upload
375
+ if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
376
+ attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
377
+ if attrs.has_key('md5'):
378
+ remote_item.update({'md5': attrs['md5']})
379
+
380
+ remote_list[key] = remote_item
381
+ return remote_list
382
+
383
+ def parse_attrs_header(attrs_header):
384
+ attrs = {}
385
+ for attr in attrs_header.split("/"):
386
+ key, val = attr.split(":")
387
+ attrs[key] = val
388
+ return attrs
389
+
390
+
391
+ def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
392
+ def __direction_str(is_remote):
393
+ return is_remote and "remote" or "local"
394
+
395
+ def _compare(src_list, dst_lst, src_remote, dst_remote, file):
396
+ """Return True if src_list[file] matches dst_list[file], else False"""
397
+ attribs_match = True
398
+ if not (src_list.has_key(file) and dst_list.has_key(file)):
399
+ info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
400
+ return False
401
+
402
+ ## check size first
403
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
404
+ debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
405
+ attribs_match = False
406
+
407
+ ## check md5
408
+ compare_md5 = 'md5' in cfg.sync_checks
409
+ # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
410
+ if compare_md5:
411
+ if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
412
+ compare_md5 = False
413
+ info(u"disabled md5 check for %s" % file)
414
+ if attribs_match and compare_md5:
415
+ try:
416
+ src_md5 = src_list.get_md5(file)
417
+ dst_md5 = dst_list.get_md5(file)
418
+ except (IOError,OSError), e:
419
+ # md5 sum verification failed - ignore that file altogether
420
+ debug(u"IGNR: %s (disappeared)" % (file))
421
+ warning(u"%s: file disappeared, ignoring." % (file))
422
+ raise
423
+
424
+ if src_md5 != dst_md5:
425
+ ## checksums are different.
426
+ attribs_match = False
427
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
428
+
429
+ return attribs_match
430
+
431
+ # we don't support local->local sync, use 'rsync' or something like that instead ;-)
432
+ assert(not(src_remote == False and dst_remote == False))
433
+
434
+ info(u"Verifying attributes...")
435
+ cfg = Config()
436
+ ## Items left on src_list will be transferred
437
+ ## Items left on update_list will be transferred after src_list
438
+ ## Items left on copy_pairs will be copied from dst1 to dst2
439
+ update_list = FileDict(ignore_case = False)
440
+ ## Items left on dst_list will be deleted
441
+ copy_pairs = []
442
+
443
+ debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
444
+
445
+ for relative_file in src_list.keys():
446
+ debug(u"CHECK: %s" % (relative_file))
447
+
448
+ if dst_list.has_key(relative_file):
449
+ ## Was --skip-existing requested?
450
+ if cfg.skip_existing:
451
+ debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
452
+ del(src_list[relative_file])
453
+ del(dst_list[relative_file])
454
+ continue
455
+
456
+ try:
457
+ same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
458
+ except (IOError,OSError), e:
459
+ debug(u"IGNR: %s (disappeared)" % (relative_file))
460
+ warning(u"%s: file disappeared, ignoring." % (relative_file))
461
+ del(src_list[relative_file])
462
+ del(dst_list[relative_file])
463
+ continue
464
+
465
+ if same_file:
466
+ debug(u"IGNR: %s (transfer not needed)" % relative_file)
467
+ del(src_list[relative_file])
468
+ del(dst_list[relative_file])
469
+
470
+ else:
471
+ # look for matching file in src
472
+ try:
473
+ md5 = src_list.get_md5(relative_file)
474
+ except IOError:
475
+ md5 = None
476
+ if md5 is not None and dst_list.by_md5.has_key(md5):
477
+ # Found one, we want to copy
478
+ dst1 = list(dst_list.by_md5[md5])[0]
479
+ debug(u"DST COPY src: %s -> %s" % (dst1, relative_file))
480
+ copy_pairs.append((src_list[relative_file], dst1, relative_file))
481
+ del(src_list[relative_file])
482
+ del(dst_list[relative_file])
483
+ else:
484
+ # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
485
+ # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
486
+ dst_list.record_md5(relative_file, md5)
487
+ update_list[relative_file] = src_list[relative_file]
488
+ del src_list[relative_file]
489
+ del dst_list[relative_file]
490
+
491
+ else:
492
+ # dst doesn't have this file
493
+ # look for matching file elsewhere in dst
494
+ try:
495
+ md5 = src_list.get_md5(relative_file)
496
+ except IOError:
497
+ md5 = None
498
+ dst1 = dst_list.find_md5_one(md5)
499
+ if dst1 is not None:
500
+ # Found one, we want to copy
501
+ debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
502
+ copy_pairs.append((src_list[relative_file], dst1, relative_file))
503
+ del(src_list[relative_file])
504
+ else:
505
+ # we don't have this file, and we don't have a copy of this file elsewhere. Get it.
506
+ # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
507
+ # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
508
+ dst_list.record_md5(relative_file, md5)
509
+
510
+ for f in dst_list.keys():
511
+ if src_list.has_key(f) or update_list.has_key(f):
512
+ # leave only those not on src_list + update_list
513
+ del dst_list[f]
514
+
515
+ return src_list, dst_list, update_list, copy_pairs
516
+
517
+ # vim:et:ts=4:sts=4:ai
Binary file
@@ -0,0 +1,53 @@
1
+ import cPickle as pickle
2
+
3
+ class HashCache(object):
4
+ def __init__(self):
5
+ self.inodes = dict()
6
+
7
+ def add(self, dev, inode, mtime, size, md5):
8
+ if dev not in self.inodes:
9
+ self.inodes[dev] = dict()
10
+ if inode not in self.inodes[dev]:
11
+ self.inodes[dev][inode] = dict()
12
+ self.inodes[dev][inode][mtime] = dict(md5=md5, size=size)
13
+
14
+ def md5(self, dev, inode, mtime, size):
15
+ try:
16
+ d = self.inodes[dev][inode][mtime]
17
+ if d['size'] != size:
18
+ return None
19
+ except:
20
+ return None
21
+ return d['md5']
22
+
23
+ def mark_all_for_purge(self):
24
+ for d in self.inodes.keys():
25
+ for i in self.inodes[d].keys():
26
+ for c in self.inodes[d][i].keys():
27
+ self.inodes[d][i][c]['purge'] = True
28
+
29
+ def unmark_for_purge(self, dev, inode, mtime, size):
30
+ d = self.inodes[dev][inode][mtime]
31
+ if d['size'] == size and 'purge' in d:
32
+ del self.inodes[dev][inode][mtime]['purge']
33
+
34
+ def purge(self):
35
+ for d in self.inodes.keys():
36
+ for i in self.inodes[d].keys():
37
+ for m in self.inodes[d][i].keys():
38
+ if 'purge' in self.inodes[d][i][m]:
39
+ del self.inodes[d][i]
40
+ break
41
+
42
+ def save(self, f):
43
+ d = dict(inodes=self.inodes, version=1)
44
+ f = open(f, 'w')
45
+ p = pickle.dump(d, f)
46
+ f.close()
47
+
48
+ def load(self, f):
49
+ f = open(f, 'r')
50
+ d = pickle.load(f)
51
+ f.close()
52
+ if d.get('version') == 1 and 'inodes' in d:
53
+ self.inodes = d['inodes']
Binary file