s3_cmd_bin 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +28 -0
  5. data/Rakefile +1 -0
  6. data/lib/s3_cmd_bin/version.rb +3 -0
  7. data/lib/s3_cmd_bin.rb +15 -0
  8. data/resources/ChangeLog +1462 -0
  9. data/resources/INSTALL +97 -0
  10. data/resources/LICENSE +339 -0
  11. data/resources/MANIFEST.in +2 -0
  12. data/resources/Makefile +4 -0
  13. data/resources/NEWS +234 -0
  14. data/resources/README +342 -0
  15. data/resources/S3/ACL.py +224 -0
  16. data/resources/S3/ACL.pyc +0 -0
  17. data/resources/S3/AccessLog.py +92 -0
  18. data/resources/S3/AccessLog.pyc +0 -0
  19. data/resources/S3/BidirMap.py +42 -0
  20. data/resources/S3/BidirMap.pyc +0 -0
  21. data/resources/S3/CloudFront.py +773 -0
  22. data/resources/S3/CloudFront.pyc +0 -0
  23. data/resources/S3/Config.py +294 -0
  24. data/resources/S3/Config.pyc +0 -0
  25. data/resources/S3/ConnMan.py +71 -0
  26. data/resources/S3/ConnMan.pyc +0 -0
  27. data/resources/S3/Exceptions.py +88 -0
  28. data/resources/S3/Exceptions.pyc +0 -0
  29. data/resources/S3/FileDict.py +53 -0
  30. data/resources/S3/FileDict.pyc +0 -0
  31. data/resources/S3/FileLists.py +517 -0
  32. data/resources/S3/FileLists.pyc +0 -0
  33. data/resources/S3/HashCache.py +53 -0
  34. data/resources/S3/HashCache.pyc +0 -0
  35. data/resources/S3/MultiPart.py +137 -0
  36. data/resources/S3/MultiPart.pyc +0 -0
  37. data/resources/S3/PkgInfo.py +14 -0
  38. data/resources/S3/PkgInfo.pyc +0 -0
  39. data/resources/S3/Progress.py +173 -0
  40. data/resources/S3/Progress.pyc +0 -0
  41. data/resources/S3/S3.py +979 -0
  42. data/resources/S3/S3.pyc +0 -0
  43. data/resources/S3/S3Uri.py +223 -0
  44. data/resources/S3/S3Uri.pyc +0 -0
  45. data/resources/S3/SimpleDB.py +178 -0
  46. data/resources/S3/SortedDict.py +66 -0
  47. data/resources/S3/SortedDict.pyc +0 -0
  48. data/resources/S3/Utils.py +462 -0
  49. data/resources/S3/Utils.pyc +0 -0
  50. data/resources/S3/__init__.py +0 -0
  51. data/resources/S3/__init__.pyc +0 -0
  52. data/resources/TODO +52 -0
  53. data/resources/artwork/AtomicClockRadio.ttf +0 -0
  54. data/resources/artwork/TypeRa.ttf +0 -0
  55. data/resources/artwork/site-top-full-size.xcf +0 -0
  56. data/resources/artwork/site-top-label-download.png +0 -0
  57. data/resources/artwork/site-top-label-s3cmd.png +0 -0
  58. data/resources/artwork/site-top-label-s3sync.png +0 -0
  59. data/resources/artwork/site-top-s3tools-logo.png +0 -0
  60. data/resources/artwork/site-top.jpg +0 -0
  61. data/resources/artwork/site-top.png +0 -0
  62. data/resources/artwork/site-top.xcf +0 -0
  63. data/resources/format-manpage.pl +196 -0
  64. data/resources/magic +63 -0
  65. data/resources/run-tests.py +537 -0
  66. data/resources/s3cmd +2116 -0
  67. data/resources/s3cmd.1 +435 -0
  68. data/resources/s3db +55 -0
  69. data/resources/setup.cfg +2 -0
  70. data/resources/setup.py +80 -0
  71. data/resources/testsuite.tar.gz +0 -0
  72. data/resources/upload-to-sf.sh +7 -0
  73. data/s3_cmd_bin.gemspec +23 -0
  74. metadata +152 -0
@@ -0,0 +1,517 @@
1
+ ## Create and compare lists of files/objects
2
+ ## Author: Michal Ludvig <michal@logix.cz>
3
+ ## http://www.logix.cz/michal
4
+ ## License: GPL Version 2
5
+
6
+ from S3 import S3
7
+ from Config import Config
8
+ from S3Uri import S3Uri
9
+ from FileDict import FileDict
10
+ from Utils import *
11
+ from Exceptions import ParameterError
12
+ from HashCache import HashCache
13
+
14
+ from logging import debug, info, warning, error
15
+
16
+ import os
17
+ import glob
18
+ import copy
19
+
20
+ __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
21
+
22
+ def _fswalk_follow_symlinks(path):
23
+ '''
24
+ Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
25
+
26
+ If a symlink directory loop is detected, emit a warning and skip.
27
+ E.g.: dir1/dir2/sym-dir -> ../dir2
28
+ '''
29
+ assert os.path.isdir(path) # only designed for directory argument
30
+ walkdirs = set([path])
31
+ for dirpath, dirnames, filenames in os.walk(path):
32
+ handle_exclude_include_walk(dirpath, dirnames, [])
33
+ real_dirpath = os.path.realpath(dirpath)
34
+ for dirname in dirnames:
35
+ current = os.path.join(dirpath, dirname)
36
+ real_current = os.path.realpath(current)
37
+ if os.path.islink(current):
38
+ if (real_dirpath == real_current or
39
+ real_dirpath.startswith(real_current + os.path.sep)):
40
+ warning("Skipping recursively symlinked directory %s" % dirname)
41
+ else:
42
+ walkdirs.add(current)
43
+ for walkdir in walkdirs:
44
+ for dirpath, dirnames, filenames in os.walk(walkdir):
45
+ handle_exclude_include_walk(dirpath, dirnames, [])
46
+ yield (dirpath, dirnames, filenames)
47
+
48
+ def _fswalk_no_symlinks(path):
49
+ '''
50
+ Directory tree generator
51
+
52
+ path (str) is the root of the directory tree to walk
53
+ '''
54
+ for dirpath, dirnames, filenames in os.walk(path):
55
+ handle_exclude_include_walk(dirpath, dirnames, filenames)
56
+ yield (dirpath, dirnames, filenames)
57
+
58
+ def filter_exclude_include(src_list):
59
+ info(u"Applying --exclude/--include")
60
+ cfg = Config()
61
+ exclude_list = FileDict(ignore_case = False)
62
+ for file in src_list.keys():
63
+ debug(u"CHECK: %s" % file)
64
+ excluded = False
65
+ for r in cfg.exclude:
66
+ if r.search(file):
67
+ excluded = True
68
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
69
+ break
70
+ if excluded:
71
+ ## No need to check for --include if not excluded
72
+ for r in cfg.include:
73
+ if r.search(file):
74
+ excluded = False
75
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
76
+ break
77
+ if excluded:
78
+ ## Still excluded - ok, action it
79
+ debug(u"EXCLUDE: %s" % file)
80
+ exclude_list[file] = src_list[file]
81
+ del(src_list[file])
82
+ continue
83
+ else:
84
+ debug(u"PASS: %r" % (file))
85
+ return src_list, exclude_list
86
+
87
+ def handle_exclude_include_walk(root, dirs, files):
88
+ cfg = Config()
89
+ copydirs = copy.copy(dirs)
90
+ copyfiles = copy.copy(files)
91
+
92
+ # exclude dir matches in the current directory
93
+ # this prevents us from recursing down trees we know we want to ignore
94
+ for x in copydirs:
95
+ d = os.path.join(root, x, '')
96
+ debug(u"CHECK: %r" % d)
97
+ excluded = False
98
+ for r in cfg.exclude:
99
+ if r.search(d):
100
+ excluded = True
101
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
102
+ break
103
+ if excluded:
104
+ ## No need to check for --include if not excluded
105
+ for r in cfg.include:
106
+ if r.search(d):
107
+ excluded = False
108
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
109
+ break
110
+ if excluded:
111
+ ## Still excluded - ok, action it
112
+ debug(u"EXCLUDE: %r" % d)
113
+ dirs.remove(x)
114
+ continue
115
+ else:
116
+ debug(u"PASS: %r" % (d))
117
+
118
+ # exclude file matches in the current directory
119
+ for x in copyfiles:
120
+ file = os.path.join(root, x)
121
+ debug(u"CHECK: %r" % file)
122
+ excluded = False
123
+ for r in cfg.exclude:
124
+ if r.search(file):
125
+ excluded = True
126
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
127
+ break
128
+ if excluded:
129
+ ## No need to check for --include if not excluded
130
+ for r in cfg.include:
131
+ if r.search(file):
132
+ excluded = False
133
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
134
+ break
135
+ if excluded:
136
+ ## Still excluded - ok, action it
137
+ debug(u"EXCLUDE: %s" % file)
138
+ files.remove(x)
139
+ continue
140
+ else:
141
+ debug(u"PASS: %r" % (file))
142
+
143
+ def fetch_local_list(args, recursive = None):
144
+ def _get_filelist_local(loc_list, local_uri, cache):
145
+ info(u"Compiling list of local files...")
146
+
147
+ if deunicodise(local_uri.basename()) == "-":
148
+ loc_list["-"] = {
149
+ 'full_name_unicode' : '-',
150
+ 'full_name' : '-',
151
+ 'size' : -1,
152
+ 'mtime' : -1,
153
+ }
154
+ return loc_list, True
155
+ if local_uri.isdir():
156
+ local_base = deunicodise(local_uri.basename())
157
+ local_path = deunicodise(local_uri.path())
158
+ if cfg.follow_symlinks:
159
+ filelist = _fswalk_follow_symlinks(local_path)
160
+ else:
161
+ filelist = _fswalk_no_symlinks(local_path)
162
+ single_file = False
163
+ else:
164
+ local_base = ""
165
+ local_path = deunicodise(local_uri.dirname())
166
+ filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
167
+ single_file = True
168
+ for root, dirs, files in filelist:
169
+ rel_root = root.replace(local_path, local_base, 1)
170
+ for f in files:
171
+ full_name = os.path.join(root, f)
172
+ if not os.path.isfile(full_name):
173
+ continue
174
+ if os.path.islink(full_name):
175
+ if not cfg.follow_symlinks:
176
+ continue
177
+ relative_file = unicodise(os.path.join(rel_root, f))
178
+ if os.path.sep != "/":
179
+ # Convert non-unix dir separators to '/'
180
+ relative_file = "/".join(relative_file.split(os.path.sep))
181
+ if cfg.urlencoding_mode == "normal":
182
+ relative_file = replace_nonprintables(relative_file)
183
+ if relative_file.startswith('./'):
184
+ relative_file = relative_file[2:]
185
+ sr = os.stat_result(os.lstat(full_name))
186
+ loc_list[relative_file] = {
187
+ 'full_name_unicode' : unicodise(full_name),
188
+ 'full_name' : full_name,
189
+ 'size' : sr.st_size,
190
+ 'mtime' : sr.st_mtime,
191
+ 'dev' : sr.st_dev,
192
+ 'inode' : sr.st_ino,
193
+ 'uid' : sr.st_uid,
194
+ 'gid' : sr.st_gid,
195
+ 'sr': sr # save it all, may need it in preserve_attrs_list
196
+ ## TODO: Possibly more to save here...
197
+ }
198
+ if 'md5' in cfg.sync_checks:
199
+ md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
200
+ if md5 is None:
201
+ try:
202
+ md5 = loc_list.get_md5(relative_file) # this does the file I/O
203
+ except IOError:
204
+ continue
205
+ cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
206
+ loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
207
+ return loc_list, single_file
208
+
209
+ def _maintain_cache(cache, local_list):
210
+ if cfg.cache_file:
211
+ cache.mark_all_for_purge()
212
+ for i in local_list.keys():
213
+ cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
214
+ cache.purge()
215
+ cache.save(cfg.cache_file)
216
+
217
+ cfg = Config()
218
+
219
+ cache = HashCache()
220
+ if cfg.cache_file:
221
+ try:
222
+ cache.load(cfg.cache_file)
223
+ except IOError:
224
+ info(u"No cache file found, creating it.")
225
+
226
+ local_uris = []
227
+ local_list = FileDict(ignore_case = False)
228
+ single_file = False
229
+
230
+ if type(args) not in (list, tuple):
231
+ args = [args]
232
+
233
+ if recursive == None:
234
+ recursive = cfg.recursive
235
+
236
+ for arg in args:
237
+ uri = S3Uri(arg)
238
+ if not uri.type == 'file':
239
+ raise ParameterError("Expecting filename or directory instead of: %s" % arg)
240
+ if uri.isdir() and not recursive:
241
+ raise ParameterError("Use --recursive to upload a directory: %s" % arg)
242
+ local_uris.append(uri)
243
+
244
+ for uri in local_uris:
245
+ list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
246
+
247
+ ## Single file is True if and only if the user
248
+ ## specified one local URI and that URI represents
249
+ ## a FILE. Ie it is False if the URI was of a DIR
250
+ ## and that dir contained only one FILE. That's not
251
+ ## a case of single_file==True.
252
+ if len(local_list) > 1:
253
+ single_file = False
254
+
255
+ _maintain_cache(cache, local_list)
256
+
257
+ return local_list, single_file
258
+
259
+ def fetch_remote_list(args, require_attribs = False, recursive = None):
260
+ def _get_filelist_remote(remote_uri, recursive = True):
261
+ ## If remote_uri ends with '/' then all remote files will have
262
+ ## the remote_uri prefix removed in the relative path.
263
+ ## If, on the other hand, the remote_uri ends with something else
264
+ ## (probably alphanumeric symbol) we'll use the last path part
265
+ ## in the relative path.
266
+ ##
267
+ ## Complicated, eh? See an example:
268
+ ## _get_filelist_remote("s3://bckt/abc/def") may yield:
269
+ ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
270
+ ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
271
+ ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
272
+ ## Furthermore a prefix-magic can restrict the return list:
273
+ ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
274
+ ## { 'xyz/blah.txt' : {} }
275
+
276
+ info(u"Retrieving list of remote files for %s ..." % remote_uri)
277
+
278
+ s3 = S3(Config())
279
+ response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
280
+
281
+ rem_base_original = rem_base = remote_uri.object()
282
+ remote_uri_original = remote_uri
283
+ if rem_base != '' and rem_base[-1] != '/':
284
+ rem_base = rem_base[:rem_base.rfind('/')+1]
285
+ remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
286
+ rem_base_len = len(rem_base)
287
+ rem_list = FileDict(ignore_case = False)
288
+ break_now = False
289
+ for object in response['list']:
290
+ if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
291
+ ## We asked for one file and we got that file :-)
292
+ key = os.path.basename(object['Key'])
293
+ object_uri_str = remote_uri_original.uri()
294
+ break_now = True
295
+ rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
296
+ else:
297
+ key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
298
+ object_uri_str = remote_uri.uri() + key
299
+ rem_list[key] = {
300
+ 'size' : int(object['Size']),
301
+ 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
302
+ 'md5' : object['ETag'][1:-1],
303
+ 'object_key' : object['Key'],
304
+ 'object_uri_str' : object_uri_str,
305
+ 'base_uri' : remote_uri,
306
+ 'dev' : None,
307
+ 'inode' : None,
308
+ }
309
+ md5 = object['ETag'][1:-1]
310
+ rem_list.record_md5(key, md5)
311
+ if break_now:
312
+ break
313
+ return rem_list
314
+
315
+ cfg = Config()
316
+ remote_uris = []
317
+ remote_list = FileDict(ignore_case = False)
318
+
319
+ if type(args) not in (list, tuple):
320
+ args = [args]
321
+
322
+ if recursive == None:
323
+ recursive = cfg.recursive
324
+
325
+ for arg in args:
326
+ uri = S3Uri(arg)
327
+ if not uri.type == 's3':
328
+ raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
329
+ remote_uris.append(uri)
330
+
331
+ if recursive:
332
+ for uri in remote_uris:
333
+ objectlist = _get_filelist_remote(uri)
334
+ for key in objectlist:
335
+ remote_list[key] = objectlist[key]
336
+ remote_list.record_md5(key, objectlist.get_md5(key))
337
+ else:
338
+ for uri in remote_uris:
339
+ uri_str = str(uri)
340
+ ## Wildcards used in remote URI?
341
+ ## If yes we'll need a bucket listing...
342
+ if uri_str.find('*') > -1 or uri_str.find('?') > -1:
343
+ first_wildcard = uri_str.find('*')
344
+ first_questionmark = uri_str.find('?')
345
+ if first_questionmark > -1 and first_questionmark < first_wildcard:
346
+ first_wildcard = first_questionmark
347
+ prefix = uri_str[:first_wildcard]
348
+ rest = uri_str[first_wildcard+1:]
349
+ ## Only request recursive listing if the 'rest' of the URI,
350
+ ## i.e. the part after first wildcard, contains '/'
351
+ need_recursion = rest.find('/') > -1
352
+ objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
353
+ for key in objectlist:
354
+ ## Check whether the 'key' matches the requested wildcards
355
+ if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
356
+ remote_list[key] = objectlist[key]
357
+ else:
358
+ ## No wildcards - simply append the given URI to the list
359
+ key = os.path.basename(uri.object())
360
+ if not key:
361
+ raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
362
+ remote_item = {
363
+ 'base_uri': uri,
364
+ 'object_uri_str': unicode(uri),
365
+ 'object_key': uri.object()
366
+ }
367
+ if require_attribs:
368
+ response = S3(cfg).object_info(uri)
369
+ remote_item.update({
370
+ 'size': int(response['headers']['content-length']),
371
+ 'md5': response['headers']['etag'].strip('"\''),
372
+ 'timestamp' : dateRFC822toUnix(response['headers']['date'])
373
+ })
374
+ # get md5 from header if it's present. We would have set that during upload
375
+ if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
376
+ attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
377
+ if attrs.has_key('md5'):
378
+ remote_item.update({'md5': attrs['md5']})
379
+
380
+ remote_list[key] = remote_item
381
+ return remote_list
382
+
383
+ def parse_attrs_header(attrs_header):
384
+ attrs = {}
385
+ for attr in attrs_header.split("/"):
386
+ key, val = attr.split(":")
387
+ attrs[key] = val
388
+ return attrs
389
+
390
+
391
+ def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
392
+ def __direction_str(is_remote):
393
+ return is_remote and "remote" or "local"
394
+
395
+ def _compare(src_list, dst_lst, src_remote, dst_remote, file):
396
+ """Return True if src_list[file] matches dst_list[file], else False"""
397
+ attribs_match = True
398
+ if not (src_list.has_key(file) and dst_list.has_key(file)):
399
+ info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
400
+ return False
401
+
402
+ ## check size first
403
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
404
+ debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
405
+ attribs_match = False
406
+
407
+ ## check md5
408
+ compare_md5 = 'md5' in cfg.sync_checks
409
+ # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
410
+ if compare_md5:
411
+ if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
412
+ compare_md5 = False
413
+ info(u"disabled md5 check for %s" % file)
414
+ if attribs_match and compare_md5:
415
+ try:
416
+ src_md5 = src_list.get_md5(file)
417
+ dst_md5 = dst_list.get_md5(file)
418
+ except (IOError,OSError), e:
419
+ # md5 sum verification failed - ignore that file altogether
420
+ debug(u"IGNR: %s (disappeared)" % (file))
421
+ warning(u"%s: file disappeared, ignoring." % (file))
422
+ raise
423
+
424
+ if src_md5 != dst_md5:
425
+ ## checksums are different.
426
+ attribs_match = False
427
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
428
+
429
+ return attribs_match
430
+
431
+ # we don't support local->local sync, use 'rsync' or something like that instead ;-)
432
+ assert(not(src_remote == False and dst_remote == False))
433
+
434
+ info(u"Verifying attributes...")
435
+ cfg = Config()
436
+ ## Items left on src_list will be transferred
437
+ ## Items left on update_list will be transferred after src_list
438
+ ## Items left on copy_pairs will be copied from dst1 to dst2
439
+ update_list = FileDict(ignore_case = False)
440
+ ## Items left on dst_list will be deleted
441
+ copy_pairs = []
442
+
443
+ debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
444
+
445
+ for relative_file in src_list.keys():
446
+ debug(u"CHECK: %s" % (relative_file))
447
+
448
+ if dst_list.has_key(relative_file):
449
+ ## Was --skip-existing requested?
450
+ if cfg.skip_existing:
451
+ debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
452
+ del(src_list[relative_file])
453
+ del(dst_list[relative_file])
454
+ continue
455
+
456
+ try:
457
+ same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
458
+ except (IOError,OSError), e:
459
+ debug(u"IGNR: %s (disappeared)" % (relative_file))
460
+ warning(u"%s: file disappeared, ignoring." % (relative_file))
461
+ del(src_list[relative_file])
462
+ del(dst_list[relative_file])
463
+ continue
464
+
465
+ if same_file:
466
+ debug(u"IGNR: %s (transfer not needed)" % relative_file)
467
+ del(src_list[relative_file])
468
+ del(dst_list[relative_file])
469
+
470
+ else:
471
+ # look for matching file in src
472
+ try:
473
+ md5 = src_list.get_md5(relative_file)
474
+ except IOError:
475
+ md5 = None
476
+ if md5 is not None and dst_list.by_md5.has_key(md5):
477
+ # Found one, we want to copy
478
+ dst1 = list(dst_list.by_md5[md5])[0]
479
+ debug(u"DST COPY src: %s -> %s" % (dst1, relative_file))
480
+ copy_pairs.append((src_list[relative_file], dst1, relative_file))
481
+ del(src_list[relative_file])
482
+ del(dst_list[relative_file])
483
+ else:
484
+ # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
485
+ # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
486
+ dst_list.record_md5(relative_file, md5)
487
+ update_list[relative_file] = src_list[relative_file]
488
+ del src_list[relative_file]
489
+ del dst_list[relative_file]
490
+
491
+ else:
492
+ # dst doesn't have this file
493
+ # look for matching file elsewhere in dst
494
+ try:
495
+ md5 = src_list.get_md5(relative_file)
496
+ except IOError:
497
+ md5 = None
498
+ dst1 = dst_list.find_md5_one(md5)
499
+ if dst1 is not None:
500
+ # Found one, we want to copy
501
+ debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
502
+ copy_pairs.append((src_list[relative_file], dst1, relative_file))
503
+ del(src_list[relative_file])
504
+ else:
505
+ # we don't have this file, and we don't have a copy of this file elsewhere. Get it.
506
+ # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
507
+ # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
508
+ dst_list.record_md5(relative_file, md5)
509
+
510
+ for f in dst_list.keys():
511
+ if src_list.has_key(f) or update_list.has_key(f):
512
+ # leave only those not on src_list + update_list
513
+ del dst_list[f]
514
+
515
+ return src_list, dst_list, update_list, copy_pairs
516
+
517
+ # vim:et:ts=4:sts=4:ai
Binary file
@@ -0,0 +1,53 @@
1
+ import cPickle as pickle
2
+
3
+ class HashCache(object):
4
+ def __init__(self):
5
+ self.inodes = dict()
6
+
7
+ def add(self, dev, inode, mtime, size, md5):
8
+ if dev not in self.inodes:
9
+ self.inodes[dev] = dict()
10
+ if inode not in self.inodes[dev]:
11
+ self.inodes[dev][inode] = dict()
12
+ self.inodes[dev][inode][mtime] = dict(md5=md5, size=size)
13
+
14
+ def md5(self, dev, inode, mtime, size):
15
+ try:
16
+ d = self.inodes[dev][inode][mtime]
17
+ if d['size'] != size:
18
+ return None
19
+ except:
20
+ return None
21
+ return d['md5']
22
+
23
+ def mark_all_for_purge(self):
24
+ for d in self.inodes.keys():
25
+ for i in self.inodes[d].keys():
26
+ for c in self.inodes[d][i].keys():
27
+ self.inodes[d][i][c]['purge'] = True
28
+
29
+ def unmark_for_purge(self, dev, inode, mtime, size):
30
+ d = self.inodes[dev][inode][mtime]
31
+ if d['size'] == size and 'purge' in d:
32
+ del self.inodes[dev][inode][mtime]['purge']
33
+
34
+ def purge(self):
35
+ for d in self.inodes.keys():
36
+ for i in self.inodes[d].keys():
37
+ for m in self.inodes[d][i].keys():
38
+ if 'purge' in self.inodes[d][i][m]:
39
+ del self.inodes[d][i]
40
+ break
41
+
42
+ def save(self, f):
43
+ d = dict(inodes=self.inodes, version=1)
44
+ f = open(f, 'w')
45
+ p = pickle.dump(d, f)
46
+ f.close()
47
+
48
+ def load(self, f):
49
+ f = open(f, 'r')
50
+ d = pickle.load(f)
51
+ f.close()
52
+ if d.get('version') == 1 and 'inodes' in d:
53
+ self.inodes = d['inodes']
Binary file