s3_cmd_bin 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/lib/s3_cmd_bin/version.rb +1 -1
  2. data/resources/ChangeLog +0 -0
  3. data/resources/INSTALL +0 -0
  4. data/resources/MANIFEST.in +1 -0
  5. data/resources/NEWS +1 -40
  6. data/resources/README +0 -0
  7. data/resources/S3/ACL.py +0 -0
  8. data/resources/S3/AccessLog.py +0 -0
  9. data/resources/S3/BidirMap.py +0 -0
  10. data/resources/S3/CloudFront.py +8 -37
  11. data/resources/S3/Config.py +1 -88
  12. data/resources/S3/Exceptions.py +1 -1
  13. data/resources/S3/FileLists.py +100 -272
  14. data/resources/S3/MultiPart.py +21 -45
  15. data/resources/S3/PkgInfo.py +1 -1
  16. data/resources/S3/Progress.py +0 -17
  17. data/resources/S3/S3.py +52 -148
  18. data/resources/S3/S3Uri.py +2 -3
  19. data/resources/S3/SimpleDB.py +0 -3
  20. data/resources/S3/SortedDict.py +0 -3
  21. data/resources/S3/Utils.py +3 -80
  22. data/resources/S3/__init__.py +0 -0
  23. data/resources/TODO +0 -0
  24. data/resources/artwork/AtomicClockRadio.ttf +0 -0
  25. data/resources/artwork/TypeRa.ttf +0 -0
  26. data/resources/artwork/site-top-full-size.xcf +0 -0
  27. data/resources/artwork/site-top-label-download.png +0 -0
  28. data/resources/artwork/site-top-label-s3cmd.png +0 -0
  29. data/resources/artwork/site-top-label-s3sync.png +0 -0
  30. data/resources/artwork/site-top-s3tools-logo.png +0 -0
  31. data/resources/artwork/site-top.jpg +0 -0
  32. data/resources/artwork/site-top.png +0 -0
  33. data/resources/artwork/site-top.xcf +0 -0
  34. data/resources/run-tests.py +2 -2
  35. data/resources/s3cmd +306 -600
  36. data/resources/s3cmd.1 +97 -84
  37. data/resources/setup.cfg +0 -0
  38. data/resources/setup.py +0 -0
  39. data/resources/testsuite.tar.gz +0 -0
  40. metadata +2 -26
  41. data/resources/LICENSE +0 -339
  42. data/resources/Makefile +0 -4
  43. data/resources/S3/ACL.pyc +0 -0
  44. data/resources/S3/AccessLog.pyc +0 -0
  45. data/resources/S3/BidirMap.pyc +0 -0
  46. data/resources/S3/CloudFront.pyc +0 -0
  47. data/resources/S3/Config.pyc +0 -0
  48. data/resources/S3/ConnMan.py +0 -71
  49. data/resources/S3/ConnMan.pyc +0 -0
  50. data/resources/S3/Exceptions.pyc +0 -0
  51. data/resources/S3/FileDict.py +0 -53
  52. data/resources/S3/FileDict.pyc +0 -0
  53. data/resources/S3/FileLists.pyc +0 -0
  54. data/resources/S3/HashCache.py +0 -53
  55. data/resources/S3/HashCache.pyc +0 -0
  56. data/resources/S3/MultiPart.pyc +0 -0
  57. data/resources/S3/PkgInfo.pyc +0 -0
  58. data/resources/S3/Progress.pyc +0 -0
  59. data/resources/S3/S3.pyc +0 -0
  60. data/resources/S3/S3Uri.pyc +0 -0
  61. data/resources/S3/SortedDict.pyc +0 -0
  62. data/resources/S3/Utils.pyc +0 -0
  63. data/resources/S3/__init__.pyc +0 -0
  64. data/resources/magic +0 -63
@@ -1,3 +1,3 @@
1
1
  module S3CmdBin
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
File without changes
File without changes
@@ -1,2 +1,3 @@
1
1
  include INSTALL README NEWS
2
2
  include s3cmd.1
3
+ include S3/*.py
@@ -1,43 +1,4 @@
1
- s3cmd 1.5.0-alpha3 - 2013-03-11
2
- ==================
3
- * Persistent HTTP/HTTPS connections for massive speedup (Michal Ludvig)
4
- * New switch --quiet for suppressing all output (Siddarth Prakash)
5
- * Honour "umask" on file downloads (Jason Dalton)
6
- * Various bugfixes from many contributors
7
-
8
- s3cmd 1.5.0-alpha2 - 2013-03-04
9
- ==================
10
- * IAM roles support (David Kohen, Eric Dowd)
11
- * Manage bucket policies (Kota Uenishi)
12
- * Various bugfixes from many contributors
13
-
14
- s3cmd 1.5.0-alpha1 - 2013-02-19
15
- ==================
16
- * Server-side copy for hardlinks/softlinks to improve performance
17
- (Matt Domsch)
18
- * New [signurl] command (Craig Ringer)
19
- * Improved symlink-loop detection (Michal Ludvig)
20
- * Add --delete-after option for sync (Matt Domsch)
21
- * Handle empty return bodies when processing S3 errors.
22
- (Kelly McLaughlin)
23
- * Upload from STDIN (Eric Connell)
24
- * Updated bucket locations (Stefhen Hovland)
25
- * Support custom HTTP headers (Brendan O'Connor, Karl Matthias)
26
- * Improved MIME support (Karsten Sperling, Christopher Noyes)
27
- * Added support for --acl-grant/--acl-revoke to 'sync' command
28
- (Michael Tyson)
29
- * CloudFront: Support default index and default root invalidation
30
- (Josep del Rio)
31
- * Command line options for access/secret keys (Matt Sweeney)
32
- * Support [setpolicy] for setting bucket policies (Joe Fiorini)
33
- * Respect the $TZ environment variable (James Brown)
34
- * Reduce memory consumption for [s3cmd du] (Charlie Schluting)
35
- * Rate limit progress updates (Steven Noonan)
36
- * Download from S3 to a temp file first (Sumit Kumar)
37
- * Reuse a single connection when doing a bucket list (Kelly McLaughlin)
38
- * Delete empty files if object_get() failed (Oren Held)
39
-
40
- s3cmd 1.1.0 - (never released)
1
+ s3cmd 1.1.0 - ???
41
2
  ===========
42
3
  * MultiPart upload enabled for both [put] and [sync]. Default chunk
43
4
  size is 15MB.
File without changes
File without changes
File without changes
File without changes
@@ -133,7 +133,7 @@ class DistributionConfig(object):
133
133
  ## </Logging>
134
134
  ## </DistributionConfig>
135
135
 
136
- EMPTY_CONFIG = "<DistributionConfig><S3Origin><DNSName/></S3Origin><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
136
+ EMPTY_CONFIG = "<DistributionConfig><Origin/><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
137
137
  xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version }
138
138
  def __init__(self, xml = None, tree = None):
139
139
  if xml is None:
@@ -174,8 +174,7 @@ class DistributionConfig(object):
174
174
  tree.attrib['xmlns'] = DistributionConfig.xmlns
175
175
 
176
176
  ## Retain the order of the following calls!
177
- s3org = appendXmlTextNode("S3Origin", '', tree)
178
- appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org)
177
+ appendXmlTextNode("Origin", self.info['Origin'], tree)
179
178
  appendXmlTextNode("CallerReference", self.info['CallerReference'], tree)
180
179
  for cname in self.info['CNAME']:
181
180
  appendXmlTextNode("CNAME", cname.lower(), tree)
@@ -282,7 +281,7 @@ class InvalidationBatch(object):
282
281
  tree = ET.Element("InvalidationBatch")
283
282
 
284
283
  for path in self.paths:
285
- if len(path) < 1 or path[0] != "/":
284
+ if path[0] != "/":
286
285
  path = "/" + path
287
286
  appendXmlTextNode("Path", path, tree)
288
287
  appendXmlTextNode("CallerReference", self.reference, tree)
@@ -323,7 +322,7 @@ class CloudFront(object):
323
322
  def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None):
324
323
  dist_config = DistributionConfig()
325
324
  dist_config.info['Enabled'] = True
326
- dist_config.info['S3Origin']['DNSName'] = uri.host_name()
325
+ dist_config.info['Origin'] = uri.host_name()
327
326
  dist_config.info['CallerReference'] = str(uri)
328
327
  dist_config.info['DefaultRootObject'] = default_root_object
329
328
  if comment == None:
@@ -424,23 +423,7 @@ class CloudFront(object):
424
423
  body = request_body, headers = headers)
425
424
  return response
426
425
 
427
- def InvalidateObjects(self, uri, paths, default_index_file, invalidate_default_index_on_cf, invalidate_default_index_root_on_cf):
428
- # joseprio: if the user doesn't want to invalidate the default index
429
- # path, or if the user wants to invalidate the root of the default
430
- # index, we need to process those paths
431
- if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf):
432
- new_paths = []
433
- default_index_suffix = '/' + default_index_file
434
- for path in paths:
435
- if path.endswith(default_index_suffix) or path == default_index_file:
436
- if invalidate_default_index_on_cf:
437
- new_paths.append(path)
438
- if invalidate_default_index_root_on_cf:
439
- new_paths.append(path[:-len(default_index_file)])
440
- else:
441
- new_paths.append(path)
442
- paths = new_paths
443
-
426
+ def InvalidateObjects(self, uri, paths):
444
427
  # uri could be either cf:// or s3:// uri
445
428
  cfuri = self.get_dist_name_for_bucket(uri)
446
429
  if len(paths) > 999:
@@ -534,10 +517,6 @@ class CloudFront(object):
534
517
  if not headers.has_key("x-amz-date"):
535
518
  headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
536
519
 
537
- if len(self.config.access_token)>0:
538
- self.config.refresh_role()
539
- headers['x-amz-security-token']=self.config.access_token
540
-
541
520
  signature = self.sign_request(headers)
542
521
  headers["Authorization"] = "AWS "+self.config.access_key+":"+signature
543
522
 
@@ -576,16 +555,8 @@ class CloudFront(object):
576
555
  for d in response['dist_list'].dist_summs:
577
556
  if d.info.has_key("S3Origin"):
578
557
  CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri()
579
- elif d.info.has_key("CustomOrigin"):
580
- # Aral: This used to skip over distributions with CustomOrigin, however, we mustn't
581
- # do this since S3 buckets that are set up as websites use custom origins.
582
- # Thankfully, the custom origin URLs they use start with the URL of the
583
- # S3 bucket. Here, we make use this naming convention to support this use case.
584
- distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0];
585
- distListIndex = distListIndex[:len(uri.bucket())]
586
- CloudFront.dist_list[distListIndex] = d.uri()
587
558
  else:
588
- # Aral: I'm not sure when this condition will be reached, but keeping it in there.
559
+ # Skip over distributions with CustomOrigin
589
560
  continue
590
561
  debug("dist_list: %s" % CloudFront.dist_list)
591
562
  try:
@@ -692,7 +663,7 @@ class Cmd(object):
692
663
  d = response['distribution']
693
664
  dc = d.info['DistributionConfig']
694
665
  output("Distribution created:")
695
- pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']))
666
+ pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
696
667
  pretty_output("DistId", d.uri())
697
668
  pretty_output("DomainName", d.info['DomainName'])
698
669
  pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
@@ -734,7 +705,7 @@ class Cmd(object):
734
705
  response = cf.GetDistInfo(cfuri)
735
706
  d = response['distribution']
736
707
  dc = d.info['DistributionConfig']
737
- pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']))
708
+ pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
738
709
  pretty_output("DistId", d.uri())
739
710
  pretty_output("DomainName", d.info['DomainName'])
740
711
  pretty_output("Status", d.info['Status'])
@@ -7,11 +7,8 @@ import logging
7
7
  from logging import debug, info, warning, error
8
8
  import re
9
9
  import os
10
- import sys
11
10
  import Progress
12
11
  from SortedDict import SortedDict
13
- import httplib
14
- import json
15
12
 
16
13
  class Config(object):
17
14
  _instance = None
@@ -19,7 +16,6 @@ class Config(object):
19
16
  _doc = {}
20
17
  access_key = ""
21
18
  secret_key = ""
22
- access_token = ""
23
19
  host_base = "s3.amazonaws.com"
24
20
  host_bucket = "%(bucket)s.s3.amazonaws.com"
25
21
  simpledb_host = "sdb.amazonaws.com"
@@ -44,7 +40,6 @@ class Config(object):
44
40
  proxy_port = 3128
45
41
  encrypt = False
46
42
  dry_run = False
47
- add_encoding_exts = ""
48
43
  preserve_attrs = True
49
44
  preserve_attrs_list = [
50
45
  'uname', # Verbose owner Name (e.g. 'root')
@@ -55,14 +50,10 @@ class Config(object):
55
50
  'mtime', # Modification timestamp
56
51
  'ctime', # Creation timestamp
57
52
  'mode', # File mode (e.g. rwxr-xr-x = 755)
58
- 'md5', # File MD5 (if known)
59
53
  #'acl', # Full ACL (not yet supported)
60
54
  ]
61
55
  delete_removed = False
62
- delete_after = False
63
- delete_after_fetch = False
64
56
  _doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted"
65
- delay_updates = False
66
57
  gpg_passphrase = ""
67
58
  gpg_command = ""
68
59
  gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
@@ -89,15 +80,9 @@ class Config(object):
89
80
  follow_symlinks = False
90
81
  socket_timeout = 300
91
82
  invalidate_on_cf = False
92
- # joseprio: new flags for default index invalidation
93
- invalidate_default_index_on_cf = False
94
- invalidate_default_index_root_on_cf = True
95
83
  website_index = "index.html"
96
84
  website_error = ""
97
85
  website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
98
- additional_destinations = []
99
- cache_file = ""
100
- add_headers = ""
101
86
 
102
87
  ## Creating a singleton
103
88
  def __new__(self, configfile = None):
@@ -107,73 +92,7 @@ class Config(object):
107
92
 
108
93
  def __init__(self, configfile = None):
109
94
  if configfile:
110
- try:
111
- self.read_config_file(configfile)
112
- except IOError, e:
113
- if 'AWS_CREDENTIAL_FILE' in os.environ:
114
- self.env_config()
115
- if len(self.access_key)==0:
116
- self.role_config()
117
-
118
- def role_config(self):
119
- conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1)
120
- try:
121
- conn.request('GET', "/latest/meta-data/iam/security-credentials/")
122
- resp = conn.getresponse()
123
- files = resp.read()
124
- if resp.status == 200 and len(files)>1:
125
- conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files)
126
- resp=conn.getresponse()
127
- if resp.status == 200:
128
- creds=json.load(resp)
129
- Config().update_option('access_key', creds['AccessKeyId'].encode('ascii'))
130
- Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii'))
131
- Config().update_option('access_token', creds['Token'].encode('ascii'))
132
- else:
133
- raise IOError
134
- else:
135
- raise IOError
136
- except:
137
- raise
138
-
139
- def role_refresh(self):
140
- try:
141
- self.role_config()
142
- except:
143
- warning("Could not refresh role")
144
-
145
- def env_config(self):
146
- cred_content = ""
147
- try:
148
- cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r')
149
- cred_content = cred_file.read()
150
- except IOError, e:
151
- debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE']))
152
- r_data = re.compile("^\s*(?P<orig_key>\w+)\s*=\s*(?P<value>.*)")
153
- r_quotes = re.compile("^\"(.*)\"\s*$")
154
- if len(cred_content)>0:
155
- for line in cred_content.splitlines():
156
- is_data = r_data.match(line)
157
- is_data = r_data.match(line)
158
- if is_data:
159
- data = is_data.groupdict()
160
- if r_quotes.match(data["value"]):
161
- data["value"] = data["value"][1:-1]
162
- if data["orig_key"]=="AWSAccessKeyId":
163
- data["key"] = "access_key"
164
- elif data["orig_key"]=="AWSSecretKey":
165
- data["key"] = "secret_key"
166
- else:
167
- del data["key"]
168
- if "key" in data:
169
- Config().update_option(data["key"], data["value"])
170
- if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
171
- print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3)
172
- else:
173
- print_value = data["value"]
174
- debug("env_Config: %s->%s" % (data["key"], print_value))
175
-
176
-
95
+ self.read_config_file(configfile)
177
96
 
178
97
  def option_list(self):
179
98
  retval = []
@@ -193,12 +112,6 @@ class Config(object):
193
112
  cp = ConfigParser(configfile)
194
113
  for option in self.option_list():
195
114
  self.update_option(option, cp.get(option))
196
-
197
- if cp.get('add_headers'):
198
- for option in cp.get('add_headers').split(","):
199
- (key, value) = option.split(':')
200
- self.extra_headers[key.replace('_', '-').strip()] = value.strip()
201
-
202
115
  self._parsed_files.append(configfile)
203
116
 
204
117
  def dump_config(self, stream):
@@ -44,7 +44,7 @@ class S3Error (S3Exception):
44
44
  if response.has_key("headers"):
45
45
  for header in response["headers"]:
46
46
  debug("HttpHeader: %s: %s" % (header, response["headers"][header]))
47
- if response.has_key("data") and response["data"]:
47
+ if response.has_key("data"):
48
48
  tree = getTreeFromXml(response["data"])
49
49
  error_node = tree
50
50
  if not error_node.tag == "Error":
@@ -6,59 +6,56 @@
6
6
  from S3 import S3
7
7
  from Config import Config
8
8
  from S3Uri import S3Uri
9
- from FileDict import FileDict
9
+ from SortedDict import SortedDict
10
10
  from Utils import *
11
11
  from Exceptions import ParameterError
12
- from HashCache import HashCache
13
12
 
14
13
  from logging import debug, info, warning, error
15
14
 
16
15
  import os
17
16
  import glob
18
- import copy
19
17
 
20
- __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
18
+ __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"]
21
19
 
22
20
  def _fswalk_follow_symlinks(path):
23
- '''
24
- Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
25
-
26
- If a symlink directory loop is detected, emit a warning and skip.
27
- E.g.: dir1/dir2/sym-dir -> ../dir2
28
- '''
29
- assert os.path.isdir(path) # only designed for directory argument
30
- walkdirs = set([path])
31
- for dirpath, dirnames, filenames in os.walk(path):
32
- handle_exclude_include_walk(dirpath, dirnames, [])
33
- real_dirpath = os.path.realpath(dirpath)
34
- for dirname in dirnames:
35
- current = os.path.join(dirpath, dirname)
36
- real_current = os.path.realpath(current)
37
- if os.path.islink(current):
38
- if (real_dirpath == real_current or
39
- real_dirpath.startswith(real_current + os.path.sep)):
40
- warning("Skipping recursively symlinked directory %s" % dirname)
41
- else:
42
- walkdirs.add(current)
43
- for walkdir in walkdirs:
44
- for dirpath, dirnames, filenames in os.walk(walkdir):
45
- handle_exclude_include_walk(dirpath, dirnames, [])
46
- yield (dirpath, dirnames, filenames)
47
-
48
- def _fswalk_no_symlinks(path):
49
- '''
50
- Directory tree generator
51
-
52
- path (str) is the root of the directory tree to walk
53
- '''
54
- for dirpath, dirnames, filenames in os.walk(path):
55
- handle_exclude_include_walk(dirpath, dirnames, filenames)
56
- yield (dirpath, dirnames, filenames)
21
+ '''
22
+ Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
23
+
24
+ If a recursive directory link is detected, emit a warning and skip.
25
+ '''
26
+ assert os.path.isdir(path) # only designed for directory argument
27
+ walkdirs = set([path])
28
+ targets = set()
29
+ for dirpath, dirnames, filenames in os.walk(path):
30
+ for dirname in dirnames:
31
+ current = os.path.join(dirpath, dirname)
32
+ target = os.path.realpath(current)
33
+ if os.path.islink(current):
34
+ if target in targets:
35
+ warning("Skipping recursively symlinked directory %s" % dirname)
36
+ else:
37
+ walkdirs.add(current)
38
+ targets.add(target)
39
+ for walkdir in walkdirs:
40
+ for value in os.walk(walkdir):
41
+ yield value
42
+
43
+ def _fswalk(path, follow_symlinks):
44
+ '''
45
+ Directory tree generator
46
+
47
+ path (str) is the root of the directory tree to walk
48
+
49
+ follow_symlinks (bool) indicates whether to descend into symbolically linked directories
50
+ '''
51
+ if follow_symlinks:
52
+ return _fswalk_follow_symlinks(path)
53
+ return os.walk(path)
57
54
 
58
55
  def filter_exclude_include(src_list):
59
56
  info(u"Applying --exclude/--include")
60
57
  cfg = Config()
61
- exclude_list = FileDict(ignore_case = False)
58
+ exclude_list = SortedDict(ignore_case = False)
62
59
  for file in src_list.keys():
63
60
  debug(u"CHECK: %s" % file)
64
61
  excluded = False
@@ -81,90 +78,23 @@ def filter_exclude_include(src_list):
81
78
  del(src_list[file])
82
79
  continue
83
80
  else:
84
- debug(u"PASS: %r" % (file))
81
+ debug(u"PASS: %s" % (file))
85
82
  return src_list, exclude_list
86
83
 
87
- def handle_exclude_include_walk(root, dirs, files):
88
- cfg = Config()
89
- copydirs = copy.copy(dirs)
90
- copyfiles = copy.copy(files)
91
-
92
- # exclude dir matches in the current directory
93
- # this prevents us from recursing down trees we know we want to ignore
94
- for x in copydirs:
95
- d = os.path.join(root, x, '')
96
- debug(u"CHECK: %r" % d)
97
- excluded = False
98
- for r in cfg.exclude:
99
- if r.search(d):
100
- excluded = True
101
- debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
102
- break
103
- if excluded:
104
- ## No need to check for --include if not excluded
105
- for r in cfg.include:
106
- if r.search(d):
107
- excluded = False
108
- debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
109
- break
110
- if excluded:
111
- ## Still excluded - ok, action it
112
- debug(u"EXCLUDE: %r" % d)
113
- dirs.remove(x)
114
- continue
115
- else:
116
- debug(u"PASS: %r" % (d))
117
-
118
- # exclude file matches in the current directory
119
- for x in copyfiles:
120
- file = os.path.join(root, x)
121
- debug(u"CHECK: %r" % file)
122
- excluded = False
123
- for r in cfg.exclude:
124
- if r.search(file):
125
- excluded = True
126
- debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
127
- break
128
- if excluded:
129
- ## No need to check for --include if not excluded
130
- for r in cfg.include:
131
- if r.search(file):
132
- excluded = False
133
- debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
134
- break
135
- if excluded:
136
- ## Still excluded - ok, action it
137
- debug(u"EXCLUDE: %s" % file)
138
- files.remove(x)
139
- continue
140
- else:
141
- debug(u"PASS: %r" % (file))
142
-
143
84
  def fetch_local_list(args, recursive = None):
144
- def _get_filelist_local(loc_list, local_uri, cache):
85
+ def _get_filelist_local(local_uri):
145
86
  info(u"Compiling list of local files...")
146
-
147
- if deunicodise(local_uri.basename()) == "-":
148
- loc_list["-"] = {
149
- 'full_name_unicode' : '-',
150
- 'full_name' : '-',
151
- 'size' : -1,
152
- 'mtime' : -1,
153
- }
154
- return loc_list, True
155
87
  if local_uri.isdir():
156
88
  local_base = deunicodise(local_uri.basename())
157
89
  local_path = deunicodise(local_uri.path())
158
- if cfg.follow_symlinks:
159
- filelist = _fswalk_follow_symlinks(local_path)
160
- else:
161
- filelist = _fswalk_no_symlinks(local_path)
90
+ filelist = _fswalk(local_path, cfg.follow_symlinks)
162
91
  single_file = False
163
92
  else:
164
93
  local_base = ""
165
94
  local_path = deunicodise(local_uri.dirname())
166
95
  filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
167
96
  single_file = True
97
+ loc_list = SortedDict(ignore_case = False)
168
98
  for root, dirs, files in filelist:
169
99
  rel_root = root.replace(local_path, local_base, 1)
170
100
  for f in files:
@@ -188,43 +118,13 @@ def fetch_local_list(args, recursive = None):
188
118
  'full_name' : full_name,
189
119
  'size' : sr.st_size,
190
120
  'mtime' : sr.st_mtime,
191
- 'dev' : sr.st_dev,
192
- 'inode' : sr.st_ino,
193
- 'uid' : sr.st_uid,
194
- 'gid' : sr.st_gid,
195
- 'sr': sr # save it all, may need it in preserve_attrs_list
196
121
  ## TODO: Possibly more to save here...
197
122
  }
198
- if 'md5' in cfg.sync_checks:
199
- md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
200
- if md5 is None:
201
- try:
202
- md5 = loc_list.get_md5(relative_file) # this does the file I/O
203
- except IOError:
204
- continue
205
- cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
206
- loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
207
123
  return loc_list, single_file
208
124
 
209
- def _maintain_cache(cache, local_list):
210
- if cfg.cache_file:
211
- cache.mark_all_for_purge()
212
- for i in local_list.keys():
213
- cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
214
- cache.purge()
215
- cache.save(cfg.cache_file)
216
-
217
125
  cfg = Config()
218
-
219
- cache = HashCache()
220
- if cfg.cache_file:
221
- try:
222
- cache.load(cfg.cache_file)
223
- except IOError:
224
- info(u"No cache file found, creating it.")
225
-
226
126
  local_uris = []
227
- local_list = FileDict(ignore_case = False)
127
+ local_list = SortedDict(ignore_case = False)
228
128
  single_file = False
229
129
 
230
130
  if type(args) not in (list, tuple):
@@ -242,7 +142,8 @@ def fetch_local_list(args, recursive = None):
242
142
  local_uris.append(uri)
243
143
 
244
144
  for uri in local_uris:
245
- list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
145
+ list_for_uri, single_file = _get_filelist_local(uri)
146
+ local_list.update(list_for_uri)
246
147
 
247
148
  ## Single file is True if and only if the user
248
149
  ## specified one local URI and that URI represents
@@ -252,8 +153,6 @@ def fetch_local_list(args, recursive = None):
252
153
  if len(local_list) > 1:
253
154
  single_file = False
254
155
 
255
- _maintain_cache(cache, local_list)
256
-
257
156
  return local_list, single_file
258
157
 
259
158
  def fetch_remote_list(args, require_attribs = False, recursive = None):
@@ -284,7 +183,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
284
183
  rem_base = rem_base[:rem_base.rfind('/')+1]
285
184
  remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
286
185
  rem_base_len = len(rem_base)
287
- rem_list = FileDict(ignore_case = False)
186
+ rem_list = SortedDict(ignore_case = False)
288
187
  break_now = False
289
188
  for object in response['list']:
290
189
  if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
@@ -292,7 +191,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
292
191
  key = os.path.basename(object['Key'])
293
192
  object_uri_str = remote_uri_original.uri()
294
193
  break_now = True
295
- rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
194
+ rem_list = {} ## Remove whatever has already been put to rem_list
296
195
  else:
297
196
  key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
298
197
  object_uri_str = remote_uri.uri() + key
@@ -303,18 +202,14 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
303
202
  'object_key' : object['Key'],
304
203
  'object_uri_str' : object_uri_str,
305
204
  'base_uri' : remote_uri,
306
- 'dev' : None,
307
- 'inode' : None,
308
205
  }
309
- md5 = object['ETag'][1:-1]
310
- rem_list.record_md5(key, md5)
311
206
  if break_now:
312
207
  break
313
208
  return rem_list
314
209
 
315
210
  cfg = Config()
316
211
  remote_uris = []
317
- remote_list = FileDict(ignore_case = False)
212
+ remote_list = SortedDict(ignore_case = False)
318
213
 
319
214
  if type(args) not in (list, tuple):
320
215
  args = [args]
@@ -333,7 +228,6 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
333
228
  objectlist = _get_filelist_remote(uri)
334
229
  for key in objectlist:
335
230
  remote_list[key] = objectlist[key]
336
- remote_list.record_md5(key, objectlist.get_md5(key))
337
231
  else:
338
232
  for uri in remote_uris:
339
233
  uri_str = str(uri)
@@ -371,147 +265,81 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
371
265
  'md5': response['headers']['etag'].strip('"\''),
372
266
  'timestamp' : dateRFC822toUnix(response['headers']['date'])
373
267
  })
374
- # get md5 from header if it's present. We would have set that during upload
375
- if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
376
- attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
377
- if attrs.has_key('md5'):
378
- remote_item.update({'md5': attrs['md5']})
379
-
380
268
  remote_list[key] = remote_item
381
269
  return remote_list
382
270
 
383
- def parse_attrs_header(attrs_header):
384
- attrs = {}
385
- for attr in attrs_header.split("/"):
386
- key, val = attr.split(":")
387
- attrs[key] = val
388
- return attrs
389
-
390
-
391
- def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
271
+ def compare_filelists(src_list, dst_list, src_remote, dst_remote):
392
272
  def __direction_str(is_remote):
393
273
  return is_remote and "remote" or "local"
394
274
 
395
- def _compare(src_list, dst_lst, src_remote, dst_remote, file):
396
- """Return True if src_list[file] matches dst_list[file], else False"""
397
- attribs_match = True
398
- if not (src_list.has_key(file) and dst_list.has_key(file)):
399
- info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
400
- return False
401
-
402
- ## check size first
403
- if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
404
- debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
405
- attribs_match = False
406
-
407
- ## check md5
408
- compare_md5 = 'md5' in cfg.sync_checks
409
- # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
410
- if compare_md5:
411
- if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
412
- compare_md5 = False
413
- info(u"disabled md5 check for %s" % file)
414
- if attribs_match and compare_md5:
415
- try:
416
- src_md5 = src_list.get_md5(file)
417
- dst_md5 = dst_list.get_md5(file)
418
- except (IOError,OSError), e:
419
- # md5 sum verification failed - ignore that file altogether
420
- debug(u"IGNR: %s (disappeared)" % (file))
421
- warning(u"%s: file disappeared, ignoring." % (file))
422
- raise
423
-
424
- if src_md5 != dst_md5:
425
- ## checksums are different.
426
- attribs_match = False
427
- debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
428
-
429
- return attribs_match
430
-
431
- # we don't support local->local sync, use 'rsync' or something like that instead ;-)
275
+ # We don't support local->local sync, use 'rsync' or something like that instead ;-)
432
276
  assert(not(src_remote == False and dst_remote == False))
433
277
 
434
278
  info(u"Verifying attributes...")
435
279
  cfg = Config()
436
- ## Items left on src_list will be transferred
437
- ## Items left on update_list will be transferred after src_list
438
- ## Items left on copy_pairs will be copied from dst1 to dst2
439
- update_list = FileDict(ignore_case = False)
440
- ## Items left on dst_list will be deleted
441
- copy_pairs = []
280
+ exists_list = SortedDict(ignore_case = False)
442
281
 
443
282
  debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
283
+ debug("src_list.keys: %s" % src_list.keys())
284
+ debug("dst_list.keys: %s" % dst_list.keys())
444
285
 
445
- for relative_file in src_list.keys():
446
- debug(u"CHECK: %s" % (relative_file))
447
-
448
- if dst_list.has_key(relative_file):
286
+ for file in src_list.keys():
287
+ debug(u"CHECK: %s" % file)
288
+ if dst_list.has_key(file):
449
289
  ## Was --skip-existing requested?
450
290
  if cfg.skip_existing:
451
- debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
452
- del(src_list[relative_file])
453
- del(dst_list[relative_file])
454
- continue
455
-
456
- try:
457
- same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
458
- except (IOError,OSError), e:
459
- debug(u"IGNR: %s (disappeared)" % (relative_file))
460
- warning(u"%s: file disappeared, ignoring." % (relative_file))
461
- del(src_list[relative_file])
462
- del(dst_list[relative_file])
291
+ debug(u"IGNR: %s (used --skip-existing)" % (file))
292
+ exists_list[file] = src_list[file]
293
+ del(src_list[file])
294
+ ## Remove from destination-list, all that is left there will be deleted
295
+ del(dst_list[file])
463
296
  continue
464
297
 
465
- if same_file:
466
- debug(u"IGNR: %s (transfer not needed)" % relative_file)
467
- del(src_list[relative_file])
468
- del(dst_list[relative_file])
298
+ attribs_match = True
299
+ ## Check size first
300
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
301
+ debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
302
+ attribs_match = False
469
303
 
470
- else:
471
- # look for matching file in src
304
+ ## Check MD5
305
+ compare_md5 = 'md5' in cfg.sync_checks
306
+ # Multipart-uploaded files don't have a valid MD5 sum - it ends with "...-NN"
307
+ if compare_md5 and (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
308
+ compare_md5 = False
309
+ info(u"Disabled MD5 check for %s" % file)
310
+ if attribs_match and compare_md5:
472
311
  try:
473
- md5 = src_list.get_md5(relative_file)
474
- except IOError:
475
- md5 = None
476
- if md5 is not None and dst_list.by_md5.has_key(md5):
477
- # Found one, we want to copy
478
- dst1 = list(dst_list.by_md5[md5])[0]
479
- debug(u"DST COPY src: %s -> %s" % (dst1, relative_file))
480
- copy_pairs.append((src_list[relative_file], dst1, relative_file))
481
- del(src_list[relative_file])
482
- del(dst_list[relative_file])
483
- else:
484
- # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
485
- # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
486
- dst_list.record_md5(relative_file, md5)
487
- update_list[relative_file] = src_list[relative_file]
488
- del src_list[relative_file]
489
- del dst_list[relative_file]
312
+ if src_remote == False and dst_remote == True:
313
+ src_md5 = hash_file_md5(src_list[file]['full_name'])
314
+ dst_md5 = dst_list[file]['md5']
315
+ elif src_remote == True and dst_remote == False:
316
+ src_md5 = src_list[file]['md5']
317
+ dst_md5 = hash_file_md5(dst_list[file]['full_name'])
318
+ elif src_remote == True and dst_remote == True:
319
+ src_md5 = src_list[file]['md5']
320
+ dst_md5 = dst_list[file]['md5']
321
+ except (IOError,OSError), e:
322
+ # MD5 sum verification failed - ignore that file altogether
323
+ debug(u"IGNR: %s (disappeared)" % (file))
324
+ warning(u"%s: file disappeared, ignoring." % (file))
325
+ del(src_list[file])
326
+ del(dst_list[file])
327
+ continue
490
328
 
491
- else:
492
- # dst doesn't have this file
493
- # look for matching file elsewhere in dst
494
- try:
495
- md5 = src_list.get_md5(relative_file)
496
- except IOError:
497
- md5 = None
498
- dst1 = dst_list.find_md5_one(md5)
499
- if dst1 is not None:
500
- # Found one, we want to copy
501
- debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
502
- copy_pairs.append((src_list[relative_file], dst1, relative_file))
503
- del(src_list[relative_file])
504
- else:
505
- # we don't have this file, and we don't have a copy of this file elsewhere. Get it.
506
- # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
507
- # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
508
- dst_list.record_md5(relative_file, md5)
329
+ if src_md5 != dst_md5:
330
+ ## Checksums are different.
331
+ attribs_match = False
332
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
333
+
334
+ if attribs_match:
335
+ ## Remove from source-list, all that is left there will be transferred
336
+ debug(u"IGNR: %s (transfer not needed)" % file)
337
+ exists_list[file] = src_list[file]
338
+ del(src_list[file])
509
339
 
510
- for f in dst_list.keys():
511
- if src_list.has_key(f) or update_list.has_key(f):
512
- # leave only those not on src_list + update_list
513
- del dst_list[f]
340
+ ## Remove from destination-list, all that is left there will be deleted
341
+ del(dst_list[file])
514
342
 
515
- return src_list, dst_list, update_list, copy_pairs
343
+ return src_list, dst_list, exists_list
516
344
 
517
345
  # vim:et:ts=4:sts=4:ai