s3_cmd_bin 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/lib/s3_cmd_bin/version.rb +1 -1
  2. data/resources/ChangeLog +0 -0
  3. data/resources/INSTALL +0 -0
  4. data/resources/MANIFEST.in +1 -0
  5. data/resources/NEWS +1 -40
  6. data/resources/README +0 -0
  7. data/resources/S3/ACL.py +0 -0
  8. data/resources/S3/AccessLog.py +0 -0
  9. data/resources/S3/BidirMap.py +0 -0
  10. data/resources/S3/CloudFront.py +8 -37
  11. data/resources/S3/Config.py +1 -88
  12. data/resources/S3/Exceptions.py +1 -1
  13. data/resources/S3/FileLists.py +100 -272
  14. data/resources/S3/MultiPart.py +21 -45
  15. data/resources/S3/PkgInfo.py +1 -1
  16. data/resources/S3/Progress.py +0 -17
  17. data/resources/S3/S3.py +52 -148
  18. data/resources/S3/S3Uri.py +2 -3
  19. data/resources/S3/SimpleDB.py +0 -3
  20. data/resources/S3/SortedDict.py +0 -3
  21. data/resources/S3/Utils.py +3 -80
  22. data/resources/S3/__init__.py +0 -0
  23. data/resources/TODO +0 -0
  24. data/resources/artwork/AtomicClockRadio.ttf +0 -0
  25. data/resources/artwork/TypeRa.ttf +0 -0
  26. data/resources/artwork/site-top-full-size.xcf +0 -0
  27. data/resources/artwork/site-top-label-download.png +0 -0
  28. data/resources/artwork/site-top-label-s3cmd.png +0 -0
  29. data/resources/artwork/site-top-label-s3sync.png +0 -0
  30. data/resources/artwork/site-top-s3tools-logo.png +0 -0
  31. data/resources/artwork/site-top.jpg +0 -0
  32. data/resources/artwork/site-top.png +0 -0
  33. data/resources/artwork/site-top.xcf +0 -0
  34. data/resources/run-tests.py +2 -2
  35. data/resources/s3cmd +306 -600
  36. data/resources/s3cmd.1 +97 -84
  37. data/resources/setup.cfg +0 -0
  38. data/resources/setup.py +0 -0
  39. data/resources/testsuite.tar.gz +0 -0
  40. metadata +2 -26
  41. data/resources/LICENSE +0 -339
  42. data/resources/Makefile +0 -4
  43. data/resources/S3/ACL.pyc +0 -0
  44. data/resources/S3/AccessLog.pyc +0 -0
  45. data/resources/S3/BidirMap.pyc +0 -0
  46. data/resources/S3/CloudFront.pyc +0 -0
  47. data/resources/S3/Config.pyc +0 -0
  48. data/resources/S3/ConnMan.py +0 -71
  49. data/resources/S3/ConnMan.pyc +0 -0
  50. data/resources/S3/Exceptions.pyc +0 -0
  51. data/resources/S3/FileDict.py +0 -53
  52. data/resources/S3/FileDict.pyc +0 -0
  53. data/resources/S3/FileLists.pyc +0 -0
  54. data/resources/S3/HashCache.py +0 -53
  55. data/resources/S3/HashCache.pyc +0 -0
  56. data/resources/S3/MultiPart.pyc +0 -0
  57. data/resources/S3/PkgInfo.pyc +0 -0
  58. data/resources/S3/Progress.pyc +0 -0
  59. data/resources/S3/S3.pyc +0 -0
  60. data/resources/S3/S3Uri.pyc +0 -0
  61. data/resources/S3/SortedDict.pyc +0 -0
  62. data/resources/S3/Utils.pyc +0 -0
  63. data/resources/S3/__init__.pyc +0 -0
  64. data/resources/magic +0 -63
@@ -1,3 +1,3 @@
1
1
  module S3CmdBin
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
File without changes
File without changes
@@ -1,2 +1,3 @@
1
1
  include INSTALL README NEWS
2
2
  include s3cmd.1
3
+ include S3/*.py
@@ -1,43 +1,4 @@
1
- s3cmd 1.5.0-alpha3 - 2013-03-11
2
- ==================
3
- * Persistent HTTP/HTTPS connections for massive speedup (Michal Ludvig)
4
- * New switch --quiet for suppressing all output (Siddarth Prakash)
5
- * Honour "umask" on file downloads (Jason Dalton)
6
- * Various bugfixes from many contributors
7
-
8
- s3cmd 1.5.0-alpha2 - 2013-03-04
9
- ==================
10
- * IAM roles support (David Kohen, Eric Dowd)
11
- * Manage bucket policies (Kota Uenishi)
12
- * Various bugfixes from many contributors
13
-
14
- s3cmd 1.5.0-alpha1 - 2013-02-19
15
- ==================
16
- * Server-side copy for hardlinks/softlinks to improve performance
17
- (Matt Domsch)
18
- * New [signurl] command (Craig Ringer)
19
- * Improved symlink-loop detection (Michal Ludvig)
20
- * Add --delete-after option for sync (Matt Domsch)
21
- * Handle empty return bodies when processing S3 errors.
22
- (Kelly McLaughlin)
23
- * Upload from STDIN (Eric Connell)
24
- * Updated bucket locations (Stefhen Hovland)
25
- * Support custom HTTP headers (Brendan O'Connor, Karl Matthias)
26
- * Improved MIME support (Karsten Sperling, Christopher Noyes)
27
- * Added support for --acl-grant/--acl-revoke to 'sync' command
28
- (Michael Tyson)
29
- * CloudFront: Support default index and default root invalidation
30
- (Josep del Rio)
31
- * Command line options for access/secret keys (Matt Sweeney)
32
- * Support [setpolicy] for setting bucket policies (Joe Fiorini)
33
- * Respect the $TZ environment variable (James Brown)
34
- * Reduce memory consumption for [s3cmd du] (Charlie Schluting)
35
- * Rate limit progress updates (Steven Noonan)
36
- * Download from S3 to a temp file first (Sumit Kumar)
37
- * Reuse a single connection when doing a bucket list (Kelly McLaughlin)
38
- * Delete empty files if object_get() failed (Oren Held)
39
-
40
- s3cmd 1.1.0 - (never released)
1
+ s3cmd 1.1.0 - ???
41
2
  ===========
42
3
  * MultiPart upload enabled for both [put] and [sync]. Default chunk
43
4
  size is 15MB.
File without changes
File without changes
File without changes
File without changes
@@ -133,7 +133,7 @@ class DistributionConfig(object):
133
133
  ## </Logging>
134
134
  ## </DistributionConfig>
135
135
 
136
- EMPTY_CONFIG = "<DistributionConfig><S3Origin><DNSName/></S3Origin><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
136
+ EMPTY_CONFIG = "<DistributionConfig><Origin/><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
137
137
  xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version }
138
138
  def __init__(self, xml = None, tree = None):
139
139
  if xml is None:
@@ -174,8 +174,7 @@ class DistributionConfig(object):
174
174
  tree.attrib['xmlns'] = DistributionConfig.xmlns
175
175
 
176
176
  ## Retain the order of the following calls!
177
- s3org = appendXmlTextNode("S3Origin", '', tree)
178
- appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org)
177
+ appendXmlTextNode("Origin", self.info['Origin'], tree)
179
178
  appendXmlTextNode("CallerReference", self.info['CallerReference'], tree)
180
179
  for cname in self.info['CNAME']:
181
180
  appendXmlTextNode("CNAME", cname.lower(), tree)
@@ -282,7 +281,7 @@ class InvalidationBatch(object):
282
281
  tree = ET.Element("InvalidationBatch")
283
282
 
284
283
  for path in self.paths:
285
- if len(path) < 1 or path[0] != "/":
284
+ if path[0] != "/":
286
285
  path = "/" + path
287
286
  appendXmlTextNode("Path", path, tree)
288
287
  appendXmlTextNode("CallerReference", self.reference, tree)
@@ -323,7 +322,7 @@ class CloudFront(object):
323
322
  def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None):
324
323
  dist_config = DistributionConfig()
325
324
  dist_config.info['Enabled'] = True
326
- dist_config.info['S3Origin']['DNSName'] = uri.host_name()
325
+ dist_config.info['Origin'] = uri.host_name()
327
326
  dist_config.info['CallerReference'] = str(uri)
328
327
  dist_config.info['DefaultRootObject'] = default_root_object
329
328
  if comment == None:
@@ -424,23 +423,7 @@ class CloudFront(object):
424
423
  body = request_body, headers = headers)
425
424
  return response
426
425
 
427
- def InvalidateObjects(self, uri, paths, default_index_file, invalidate_default_index_on_cf, invalidate_default_index_root_on_cf):
428
- # joseprio: if the user doesn't want to invalidate the default index
429
- # path, or if the user wants to invalidate the root of the default
430
- # index, we need to process those paths
431
- if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf):
432
- new_paths = []
433
- default_index_suffix = '/' + default_index_file
434
- for path in paths:
435
- if path.endswith(default_index_suffix) or path == default_index_file:
436
- if invalidate_default_index_on_cf:
437
- new_paths.append(path)
438
- if invalidate_default_index_root_on_cf:
439
- new_paths.append(path[:-len(default_index_file)])
440
- else:
441
- new_paths.append(path)
442
- paths = new_paths
443
-
426
+ def InvalidateObjects(self, uri, paths):
444
427
  # uri could be either cf:// or s3:// uri
445
428
  cfuri = self.get_dist_name_for_bucket(uri)
446
429
  if len(paths) > 999:
@@ -534,10 +517,6 @@ class CloudFront(object):
534
517
  if not headers.has_key("x-amz-date"):
535
518
  headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
536
519
 
537
- if len(self.config.access_token)>0:
538
- self.config.refresh_role()
539
- headers['x-amz-security-token']=self.config.access_token
540
-
541
520
  signature = self.sign_request(headers)
542
521
  headers["Authorization"] = "AWS "+self.config.access_key+":"+signature
543
522
 
@@ -576,16 +555,8 @@ class CloudFront(object):
576
555
  for d in response['dist_list'].dist_summs:
577
556
  if d.info.has_key("S3Origin"):
578
557
  CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri()
579
- elif d.info.has_key("CustomOrigin"):
580
- # Aral: This used to skip over distributions with CustomOrigin, however, we mustn't
581
- # do this since S3 buckets that are set up as websites use custom origins.
582
- # Thankfully, the custom origin URLs they use start with the URL of the
583
- # S3 bucket. Here, we make use this naming convention to support this use case.
584
- distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0];
585
- distListIndex = distListIndex[:len(uri.bucket())]
586
- CloudFront.dist_list[distListIndex] = d.uri()
587
558
  else:
588
- # Aral: I'm not sure when this condition will be reached, but keeping it in there.
559
+ # Skip over distributions with CustomOrigin
589
560
  continue
590
561
  debug("dist_list: %s" % CloudFront.dist_list)
591
562
  try:
@@ -692,7 +663,7 @@ class Cmd(object):
692
663
  d = response['distribution']
693
664
  dc = d.info['DistributionConfig']
694
665
  output("Distribution created:")
695
- pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']))
666
+ pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
696
667
  pretty_output("DistId", d.uri())
697
668
  pretty_output("DomainName", d.info['DomainName'])
698
669
  pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
@@ -734,7 +705,7 @@ class Cmd(object):
734
705
  response = cf.GetDistInfo(cfuri)
735
706
  d = response['distribution']
736
707
  dc = d.info['DistributionConfig']
737
- pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']))
708
+ pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin']))
738
709
  pretty_output("DistId", d.uri())
739
710
  pretty_output("DomainName", d.info['DomainName'])
740
711
  pretty_output("Status", d.info['Status'])
@@ -7,11 +7,8 @@ import logging
7
7
  from logging import debug, info, warning, error
8
8
  import re
9
9
  import os
10
- import sys
11
10
  import Progress
12
11
  from SortedDict import SortedDict
13
- import httplib
14
- import json
15
12
 
16
13
  class Config(object):
17
14
  _instance = None
@@ -19,7 +16,6 @@ class Config(object):
19
16
  _doc = {}
20
17
  access_key = ""
21
18
  secret_key = ""
22
- access_token = ""
23
19
  host_base = "s3.amazonaws.com"
24
20
  host_bucket = "%(bucket)s.s3.amazonaws.com"
25
21
  simpledb_host = "sdb.amazonaws.com"
@@ -44,7 +40,6 @@ class Config(object):
44
40
  proxy_port = 3128
45
41
  encrypt = False
46
42
  dry_run = False
47
- add_encoding_exts = ""
48
43
  preserve_attrs = True
49
44
  preserve_attrs_list = [
50
45
  'uname', # Verbose owner Name (e.g. 'root')
@@ -55,14 +50,10 @@ class Config(object):
55
50
  'mtime', # Modification timestamp
56
51
  'ctime', # Creation timestamp
57
52
  'mode', # File mode (e.g. rwxr-xr-x = 755)
58
- 'md5', # File MD5 (if known)
59
53
  #'acl', # Full ACL (not yet supported)
60
54
  ]
61
55
  delete_removed = False
62
- delete_after = False
63
- delete_after_fetch = False
64
56
  _doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted"
65
- delay_updates = False
66
57
  gpg_passphrase = ""
67
58
  gpg_command = ""
68
59
  gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
@@ -89,15 +80,9 @@ class Config(object):
89
80
  follow_symlinks = False
90
81
  socket_timeout = 300
91
82
  invalidate_on_cf = False
92
- # joseprio: new flags for default index invalidation
93
- invalidate_default_index_on_cf = False
94
- invalidate_default_index_root_on_cf = True
95
83
  website_index = "index.html"
96
84
  website_error = ""
97
85
  website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
98
- additional_destinations = []
99
- cache_file = ""
100
- add_headers = ""
101
86
 
102
87
  ## Creating a singleton
103
88
  def __new__(self, configfile = None):
@@ -107,73 +92,7 @@ class Config(object):
107
92
 
108
93
  def __init__(self, configfile = None):
109
94
  if configfile:
110
- try:
111
- self.read_config_file(configfile)
112
- except IOError, e:
113
- if 'AWS_CREDENTIAL_FILE' in os.environ:
114
- self.env_config()
115
- if len(self.access_key)==0:
116
- self.role_config()
117
-
118
- def role_config(self):
119
- conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1)
120
- try:
121
- conn.request('GET', "/latest/meta-data/iam/security-credentials/")
122
- resp = conn.getresponse()
123
- files = resp.read()
124
- if resp.status == 200 and len(files)>1:
125
- conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files)
126
- resp=conn.getresponse()
127
- if resp.status == 200:
128
- creds=json.load(resp)
129
- Config().update_option('access_key', creds['AccessKeyId'].encode('ascii'))
130
- Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii'))
131
- Config().update_option('access_token', creds['Token'].encode('ascii'))
132
- else:
133
- raise IOError
134
- else:
135
- raise IOError
136
- except:
137
- raise
138
-
139
- def role_refresh(self):
140
- try:
141
- self.role_config()
142
- except:
143
- warning("Could not refresh role")
144
-
145
- def env_config(self):
146
- cred_content = ""
147
- try:
148
- cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r')
149
- cred_content = cred_file.read()
150
- except IOError, e:
151
- debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE']))
152
- r_data = re.compile("^\s*(?P<orig_key>\w+)\s*=\s*(?P<value>.*)")
153
- r_quotes = re.compile("^\"(.*)\"\s*$")
154
- if len(cred_content)>0:
155
- for line in cred_content.splitlines():
156
- is_data = r_data.match(line)
157
- is_data = r_data.match(line)
158
- if is_data:
159
- data = is_data.groupdict()
160
- if r_quotes.match(data["value"]):
161
- data["value"] = data["value"][1:-1]
162
- if data["orig_key"]=="AWSAccessKeyId":
163
- data["key"] = "access_key"
164
- elif data["orig_key"]=="AWSSecretKey":
165
- data["key"] = "secret_key"
166
- else:
167
- del data["key"]
168
- if "key" in data:
169
- Config().update_option(data["key"], data["value"])
170
- if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
171
- print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3)
172
- else:
173
- print_value = data["value"]
174
- debug("env_Config: %s->%s" % (data["key"], print_value))
175
-
176
-
95
+ self.read_config_file(configfile)
177
96
 
178
97
  def option_list(self):
179
98
  retval = []
@@ -193,12 +112,6 @@ class Config(object):
193
112
  cp = ConfigParser(configfile)
194
113
  for option in self.option_list():
195
114
  self.update_option(option, cp.get(option))
196
-
197
- if cp.get('add_headers'):
198
- for option in cp.get('add_headers').split(","):
199
- (key, value) = option.split(':')
200
- self.extra_headers[key.replace('_', '-').strip()] = value.strip()
201
-
202
115
  self._parsed_files.append(configfile)
203
116
 
204
117
  def dump_config(self, stream):
@@ -44,7 +44,7 @@ class S3Error (S3Exception):
44
44
  if response.has_key("headers"):
45
45
  for header in response["headers"]:
46
46
  debug("HttpHeader: %s: %s" % (header, response["headers"][header]))
47
- if response.has_key("data") and response["data"]:
47
+ if response.has_key("data"):
48
48
  tree = getTreeFromXml(response["data"])
49
49
  error_node = tree
50
50
  if not error_node.tag == "Error":
@@ -6,59 +6,56 @@
6
6
  from S3 import S3
7
7
  from Config import Config
8
8
  from S3Uri import S3Uri
9
- from FileDict import FileDict
9
+ from SortedDict import SortedDict
10
10
  from Utils import *
11
11
  from Exceptions import ParameterError
12
- from HashCache import HashCache
13
12
 
14
13
  from logging import debug, info, warning, error
15
14
 
16
15
  import os
17
16
  import glob
18
- import copy
19
17
 
20
- __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
18
+ __all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"]
21
19
 
22
20
  def _fswalk_follow_symlinks(path):
23
- '''
24
- Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
25
-
26
- If a symlink directory loop is detected, emit a warning and skip.
27
- E.g.: dir1/dir2/sym-dir -> ../dir2
28
- '''
29
- assert os.path.isdir(path) # only designed for directory argument
30
- walkdirs = set([path])
31
- for dirpath, dirnames, filenames in os.walk(path):
32
- handle_exclude_include_walk(dirpath, dirnames, [])
33
- real_dirpath = os.path.realpath(dirpath)
34
- for dirname in dirnames:
35
- current = os.path.join(dirpath, dirname)
36
- real_current = os.path.realpath(current)
37
- if os.path.islink(current):
38
- if (real_dirpath == real_current or
39
- real_dirpath.startswith(real_current + os.path.sep)):
40
- warning("Skipping recursively symlinked directory %s" % dirname)
41
- else:
42
- walkdirs.add(current)
43
- for walkdir in walkdirs:
44
- for dirpath, dirnames, filenames in os.walk(walkdir):
45
- handle_exclude_include_walk(dirpath, dirnames, [])
46
- yield (dirpath, dirnames, filenames)
47
-
48
- def _fswalk_no_symlinks(path):
49
- '''
50
- Directory tree generator
51
-
52
- path (str) is the root of the directory tree to walk
53
- '''
54
- for dirpath, dirnames, filenames in os.walk(path):
55
- handle_exclude_include_walk(dirpath, dirnames, filenames)
56
- yield (dirpath, dirnames, filenames)
21
+ '''
22
+ Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
23
+
24
+ If a recursive directory link is detected, emit a warning and skip.
25
+ '''
26
+ assert os.path.isdir(path) # only designed for directory argument
27
+ walkdirs = set([path])
28
+ targets = set()
29
+ for dirpath, dirnames, filenames in os.walk(path):
30
+ for dirname in dirnames:
31
+ current = os.path.join(dirpath, dirname)
32
+ target = os.path.realpath(current)
33
+ if os.path.islink(current):
34
+ if target in targets:
35
+ warning("Skipping recursively symlinked directory %s" % dirname)
36
+ else:
37
+ walkdirs.add(current)
38
+ targets.add(target)
39
+ for walkdir in walkdirs:
40
+ for value in os.walk(walkdir):
41
+ yield value
42
+
43
+ def _fswalk(path, follow_symlinks):
44
+ '''
45
+ Directory tree generator
46
+
47
+ path (str) is the root of the directory tree to walk
48
+
49
+ follow_symlinks (bool) indicates whether to descend into symbolically linked directories
50
+ '''
51
+ if follow_symlinks:
52
+ return _fswalk_follow_symlinks(path)
53
+ return os.walk(path)
57
54
 
58
55
  def filter_exclude_include(src_list):
59
56
  info(u"Applying --exclude/--include")
60
57
  cfg = Config()
61
- exclude_list = FileDict(ignore_case = False)
58
+ exclude_list = SortedDict(ignore_case = False)
62
59
  for file in src_list.keys():
63
60
  debug(u"CHECK: %s" % file)
64
61
  excluded = False
@@ -81,90 +78,23 @@ def filter_exclude_include(src_list):
81
78
  del(src_list[file])
82
79
  continue
83
80
  else:
84
- debug(u"PASS: %r" % (file))
81
+ debug(u"PASS: %s" % (file))
85
82
  return src_list, exclude_list
86
83
 
87
- def handle_exclude_include_walk(root, dirs, files):
88
- cfg = Config()
89
- copydirs = copy.copy(dirs)
90
- copyfiles = copy.copy(files)
91
-
92
- # exclude dir matches in the current directory
93
- # this prevents us from recursing down trees we know we want to ignore
94
- for x in copydirs:
95
- d = os.path.join(root, x, '')
96
- debug(u"CHECK: %r" % d)
97
- excluded = False
98
- for r in cfg.exclude:
99
- if r.search(d):
100
- excluded = True
101
- debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
102
- break
103
- if excluded:
104
- ## No need to check for --include if not excluded
105
- for r in cfg.include:
106
- if r.search(d):
107
- excluded = False
108
- debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
109
- break
110
- if excluded:
111
- ## Still excluded - ok, action it
112
- debug(u"EXCLUDE: %r" % d)
113
- dirs.remove(x)
114
- continue
115
- else:
116
- debug(u"PASS: %r" % (d))
117
-
118
- # exclude file matches in the current directory
119
- for x in copyfiles:
120
- file = os.path.join(root, x)
121
- debug(u"CHECK: %r" % file)
122
- excluded = False
123
- for r in cfg.exclude:
124
- if r.search(file):
125
- excluded = True
126
- debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
127
- break
128
- if excluded:
129
- ## No need to check for --include if not excluded
130
- for r in cfg.include:
131
- if r.search(file):
132
- excluded = False
133
- debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
134
- break
135
- if excluded:
136
- ## Still excluded - ok, action it
137
- debug(u"EXCLUDE: %s" % file)
138
- files.remove(x)
139
- continue
140
- else:
141
- debug(u"PASS: %r" % (file))
142
-
143
84
  def fetch_local_list(args, recursive = None):
144
- def _get_filelist_local(loc_list, local_uri, cache):
85
+ def _get_filelist_local(local_uri):
145
86
  info(u"Compiling list of local files...")
146
-
147
- if deunicodise(local_uri.basename()) == "-":
148
- loc_list["-"] = {
149
- 'full_name_unicode' : '-',
150
- 'full_name' : '-',
151
- 'size' : -1,
152
- 'mtime' : -1,
153
- }
154
- return loc_list, True
155
87
  if local_uri.isdir():
156
88
  local_base = deunicodise(local_uri.basename())
157
89
  local_path = deunicodise(local_uri.path())
158
- if cfg.follow_symlinks:
159
- filelist = _fswalk_follow_symlinks(local_path)
160
- else:
161
- filelist = _fswalk_no_symlinks(local_path)
90
+ filelist = _fswalk(local_path, cfg.follow_symlinks)
162
91
  single_file = False
163
92
  else:
164
93
  local_base = ""
165
94
  local_path = deunicodise(local_uri.dirname())
166
95
  filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
167
96
  single_file = True
97
+ loc_list = SortedDict(ignore_case = False)
168
98
  for root, dirs, files in filelist:
169
99
  rel_root = root.replace(local_path, local_base, 1)
170
100
  for f in files:
@@ -188,43 +118,13 @@ def fetch_local_list(args, recursive = None):
188
118
  'full_name' : full_name,
189
119
  'size' : sr.st_size,
190
120
  'mtime' : sr.st_mtime,
191
- 'dev' : sr.st_dev,
192
- 'inode' : sr.st_ino,
193
- 'uid' : sr.st_uid,
194
- 'gid' : sr.st_gid,
195
- 'sr': sr # save it all, may need it in preserve_attrs_list
196
121
  ## TODO: Possibly more to save here...
197
122
  }
198
- if 'md5' in cfg.sync_checks:
199
- md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
200
- if md5 is None:
201
- try:
202
- md5 = loc_list.get_md5(relative_file) # this does the file I/O
203
- except IOError:
204
- continue
205
- cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
206
- loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
207
123
  return loc_list, single_file
208
124
 
209
- def _maintain_cache(cache, local_list):
210
- if cfg.cache_file:
211
- cache.mark_all_for_purge()
212
- for i in local_list.keys():
213
- cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
214
- cache.purge()
215
- cache.save(cfg.cache_file)
216
-
217
125
  cfg = Config()
218
-
219
- cache = HashCache()
220
- if cfg.cache_file:
221
- try:
222
- cache.load(cfg.cache_file)
223
- except IOError:
224
- info(u"No cache file found, creating it.")
225
-
226
126
  local_uris = []
227
- local_list = FileDict(ignore_case = False)
127
+ local_list = SortedDict(ignore_case = False)
228
128
  single_file = False
229
129
 
230
130
  if type(args) not in (list, tuple):
@@ -242,7 +142,8 @@ def fetch_local_list(args, recursive = None):
242
142
  local_uris.append(uri)
243
143
 
244
144
  for uri in local_uris:
245
- list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
145
+ list_for_uri, single_file = _get_filelist_local(uri)
146
+ local_list.update(list_for_uri)
246
147
 
247
148
  ## Single file is True if and only if the user
248
149
  ## specified one local URI and that URI represents
@@ -252,8 +153,6 @@ def fetch_local_list(args, recursive = None):
252
153
  if len(local_list) > 1:
253
154
  single_file = False
254
155
 
255
- _maintain_cache(cache, local_list)
256
-
257
156
  return local_list, single_file
258
157
 
259
158
  def fetch_remote_list(args, require_attribs = False, recursive = None):
@@ -284,7 +183,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
284
183
  rem_base = rem_base[:rem_base.rfind('/')+1]
285
184
  remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
286
185
  rem_base_len = len(rem_base)
287
- rem_list = FileDict(ignore_case = False)
186
+ rem_list = SortedDict(ignore_case = False)
288
187
  break_now = False
289
188
  for object in response['list']:
290
189
  if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
@@ -292,7 +191,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
292
191
  key = os.path.basename(object['Key'])
293
192
  object_uri_str = remote_uri_original.uri()
294
193
  break_now = True
295
- rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
194
+ rem_list = {} ## Remove whatever has already been put to rem_list
296
195
  else:
297
196
  key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
298
197
  object_uri_str = remote_uri.uri() + key
@@ -303,18 +202,14 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
303
202
  'object_key' : object['Key'],
304
203
  'object_uri_str' : object_uri_str,
305
204
  'base_uri' : remote_uri,
306
- 'dev' : None,
307
- 'inode' : None,
308
205
  }
309
- md5 = object['ETag'][1:-1]
310
- rem_list.record_md5(key, md5)
311
206
  if break_now:
312
207
  break
313
208
  return rem_list
314
209
 
315
210
  cfg = Config()
316
211
  remote_uris = []
317
- remote_list = FileDict(ignore_case = False)
212
+ remote_list = SortedDict(ignore_case = False)
318
213
 
319
214
  if type(args) not in (list, tuple):
320
215
  args = [args]
@@ -333,7 +228,6 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
333
228
  objectlist = _get_filelist_remote(uri)
334
229
  for key in objectlist:
335
230
  remote_list[key] = objectlist[key]
336
- remote_list.record_md5(key, objectlist.get_md5(key))
337
231
  else:
338
232
  for uri in remote_uris:
339
233
  uri_str = str(uri)
@@ -371,147 +265,81 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
371
265
  'md5': response['headers']['etag'].strip('"\''),
372
266
  'timestamp' : dateRFC822toUnix(response['headers']['date'])
373
267
  })
374
- # get md5 from header if it's present. We would have set that during upload
375
- if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
376
- attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
377
- if attrs.has_key('md5'):
378
- remote_item.update({'md5': attrs['md5']})
379
-
380
268
  remote_list[key] = remote_item
381
269
  return remote_list
382
270
 
383
- def parse_attrs_header(attrs_header):
384
- attrs = {}
385
- for attr in attrs_header.split("/"):
386
- key, val = attr.split(":")
387
- attrs[key] = val
388
- return attrs
389
-
390
-
391
- def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
271
+ def compare_filelists(src_list, dst_list, src_remote, dst_remote):
392
272
  def __direction_str(is_remote):
393
273
  return is_remote and "remote" or "local"
394
274
 
395
- def _compare(src_list, dst_lst, src_remote, dst_remote, file):
396
- """Return True if src_list[file] matches dst_list[file], else False"""
397
- attribs_match = True
398
- if not (src_list.has_key(file) and dst_list.has_key(file)):
399
- info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
400
- return False
401
-
402
- ## check size first
403
- if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
404
- debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
405
- attribs_match = False
406
-
407
- ## check md5
408
- compare_md5 = 'md5' in cfg.sync_checks
409
- # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
410
- if compare_md5:
411
- if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
412
- compare_md5 = False
413
- info(u"disabled md5 check for %s" % file)
414
- if attribs_match and compare_md5:
415
- try:
416
- src_md5 = src_list.get_md5(file)
417
- dst_md5 = dst_list.get_md5(file)
418
- except (IOError,OSError), e:
419
- # md5 sum verification failed - ignore that file altogether
420
- debug(u"IGNR: %s (disappeared)" % (file))
421
- warning(u"%s: file disappeared, ignoring." % (file))
422
- raise
423
-
424
- if src_md5 != dst_md5:
425
- ## checksums are different.
426
- attribs_match = False
427
- debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
428
-
429
- return attribs_match
430
-
431
- # we don't support local->local sync, use 'rsync' or something like that instead ;-)
275
+ # We don't support local->local sync, use 'rsync' or something like that instead ;-)
432
276
  assert(not(src_remote == False and dst_remote == False))
433
277
 
434
278
  info(u"Verifying attributes...")
435
279
  cfg = Config()
436
- ## Items left on src_list will be transferred
437
- ## Items left on update_list will be transferred after src_list
438
- ## Items left on copy_pairs will be copied from dst1 to dst2
439
- update_list = FileDict(ignore_case = False)
440
- ## Items left on dst_list will be deleted
441
- copy_pairs = []
280
+ exists_list = SortedDict(ignore_case = False)
442
281
 
443
282
  debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
283
+ debug("src_list.keys: %s" % src_list.keys())
284
+ debug("dst_list.keys: %s" % dst_list.keys())
444
285
 
445
- for relative_file in src_list.keys():
446
- debug(u"CHECK: %s" % (relative_file))
447
-
448
- if dst_list.has_key(relative_file):
286
+ for file in src_list.keys():
287
+ debug(u"CHECK: %s" % file)
288
+ if dst_list.has_key(file):
449
289
  ## Was --skip-existing requested?
450
290
  if cfg.skip_existing:
451
- debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
452
- del(src_list[relative_file])
453
- del(dst_list[relative_file])
454
- continue
455
-
456
- try:
457
- same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
458
- except (IOError,OSError), e:
459
- debug(u"IGNR: %s (disappeared)" % (relative_file))
460
- warning(u"%s: file disappeared, ignoring." % (relative_file))
461
- del(src_list[relative_file])
462
- del(dst_list[relative_file])
291
+ debug(u"IGNR: %s (used --skip-existing)" % (file))
292
+ exists_list[file] = src_list[file]
293
+ del(src_list[file])
294
+ ## Remove from destination-list, all that is left there will be deleted
295
+ del(dst_list[file])
463
296
  continue
464
297
 
465
- if same_file:
466
- debug(u"IGNR: %s (transfer not needed)" % relative_file)
467
- del(src_list[relative_file])
468
- del(dst_list[relative_file])
298
+ attribs_match = True
299
+ ## Check size first
300
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
301
+ debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
302
+ attribs_match = False
469
303
 
470
- else:
471
- # look for matching file in src
304
+ ## Check MD5
305
+ compare_md5 = 'md5' in cfg.sync_checks
306
+ # Multipart-uploaded files don't have a valid MD5 sum - it ends with "...-NN"
307
+ if compare_md5 and (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
308
+ compare_md5 = False
309
+ info(u"Disabled MD5 check for %s" % file)
310
+ if attribs_match and compare_md5:
472
311
  try:
473
- md5 = src_list.get_md5(relative_file)
474
- except IOError:
475
- md5 = None
476
- if md5 is not None and dst_list.by_md5.has_key(md5):
477
- # Found one, we want to copy
478
- dst1 = list(dst_list.by_md5[md5])[0]
479
- debug(u"DST COPY src: %s -> %s" % (dst1, relative_file))
480
- copy_pairs.append((src_list[relative_file], dst1, relative_file))
481
- del(src_list[relative_file])
482
- del(dst_list[relative_file])
483
- else:
484
- # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
485
- # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
486
- dst_list.record_md5(relative_file, md5)
487
- update_list[relative_file] = src_list[relative_file]
488
- del src_list[relative_file]
489
- del dst_list[relative_file]
312
+ if src_remote == False and dst_remote == True:
313
+ src_md5 = hash_file_md5(src_list[file]['full_name'])
314
+ dst_md5 = dst_list[file]['md5']
315
+ elif src_remote == True and dst_remote == False:
316
+ src_md5 = src_list[file]['md5']
317
+ dst_md5 = hash_file_md5(dst_list[file]['full_name'])
318
+ elif src_remote == True and dst_remote == True:
319
+ src_md5 = src_list[file]['md5']
320
+ dst_md5 = dst_list[file]['md5']
321
+ except (IOError,OSError), e:
322
+ # MD5 sum verification failed - ignore that file altogether
323
+ debug(u"IGNR: %s (disappeared)" % (file))
324
+ warning(u"%s: file disappeared, ignoring." % (file))
325
+ del(src_list[file])
326
+ del(dst_list[file])
327
+ continue
490
328
 
491
- else:
492
- # dst doesn't have this file
493
- # look for matching file elsewhere in dst
494
- try:
495
- md5 = src_list.get_md5(relative_file)
496
- except IOError:
497
- md5 = None
498
- dst1 = dst_list.find_md5_one(md5)
499
- if dst1 is not None:
500
- # Found one, we want to copy
501
- debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
502
- copy_pairs.append((src_list[relative_file], dst1, relative_file))
503
- del(src_list[relative_file])
504
- else:
505
- # we don't have this file, and we don't have a copy of this file elsewhere. Get it.
506
- # record that we will get this file transferred to us (before all the copies), so if we come across it later again,
507
- # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
508
- dst_list.record_md5(relative_file, md5)
329
+ if src_md5 != dst_md5:
330
+ ## Checksums are different.
331
+ attribs_match = False
332
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
333
+
334
+ if attribs_match:
335
+ ## Remove from source-list, all that is left there will be transferred
336
+ debug(u"IGNR: %s (transfer not needed)" % file)
337
+ exists_list[file] = src_list[file]
338
+ del(src_list[file])
509
339
 
510
- for f in dst_list.keys():
511
- if src_list.has_key(f) or update_list.has_key(f):
512
- # leave only those not on src_list + update_list
513
- del dst_list[f]
340
+ ## Remove from destination-list, all that is left there will be deleted
341
+ del(dst_list[file])
514
342
 
515
- return src_list, dst_list, update_list, copy_pairs
343
+ return src_list, dst_list, exists_list
516
344
 
517
345
  # vim:et:ts=4:sts=4:ai