s3sync 0.3.4 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +175 -0
- data/README +401 -0
- data/README_s3cmd +172 -0
- data/Rakefile +35 -0
- data/bin/s3cmd +245 -0
- data/bin/s3sync +726 -67
- data/lib/HTTPStreaming.rb +103 -0
- data/lib/S3.rb +707 -0
- data/lib/S3_s3sync_mod.rb +143 -0
- data/lib/S3encoder.rb +50 -0
- data/lib/s3config.rb +27 -0
- data/lib/s3try.rb +161 -0
- data/lib/thread_generator.rb +383 -0
- data/lib/version.rb +9 -0
- data/setup.rb +1585 -0
- metadata +54 -177
- data/lib/s3sync.rb +0 -2
- data/lib/s3sync/cli.rb +0 -475
- data/lib/s3sync/config.rb +0 -98
- data/lib/s3sync/exceptions.rb +0 -55
- data/lib/s3sync/sync.rb +0 -371
- data/lib/s3sync/util.rb +0 -29
- data/lib/s3sync/version.rb +0 -27
data/bin/s3sync
CHANGED
@@ -1,76 +1,735 @@
|
|
1
|
-
|
1
|
+
#! /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/bin/ruby
|
2
|
+
# This software code is made available "AS IS" without warranties of any
|
3
|
+
# kind. You may copy, display, modify and redistribute the software
|
4
|
+
# code either by itself or as incorporated into your code; provided that
|
5
|
+
# you do not remove any proprietary notices. Your use of this software
|
6
|
+
# code is at your own risk and you waive any claim against the author
|
7
|
+
# with respect to your use of this software code.
|
8
|
+
# (c) 2007 s3sync.net
|
2
9
|
#
|
3
|
-
# s3sync - Tool belt for managing your S3 buckets
|
4
|
-
#
|
5
|
-
# The MIT License (MIT)
|
6
|
-
#
|
7
|
-
# Copyright (c) 2013 Lincoln de Sousa <lincoln@clarete.li>
|
8
|
-
#
|
9
|
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
10
|
-
# of this software and associated documentation files (the "Software"), to deal
|
11
|
-
# in the Software without restriction, including without limitation the rights
|
12
|
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
13
|
-
# copies of the Software, and to permit persons to whom the Software is
|
14
|
-
# furnished to do so, subject to the following conditions:
|
15
|
-
#
|
16
|
-
# The above copyright notice and this permission notice shall be included in
|
17
|
-
# all copies or substantial portions of the Software.
|
18
|
-
#
|
19
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
20
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
22
|
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
23
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
24
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
25
|
-
# THE SOFTWARE.
|
26
10
|
|
27
|
-
|
11
|
+
module S3sync
|
28
12
|
|
29
|
-
|
30
|
-
|
31
|
-
|
13
|
+
$S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
|
14
|
+
|
15
|
+
$S3SYNC_VERSION = '1.2.5'
|
32
16
|
|
33
|
-
|
17
|
+
# always look "here" for include files (thanks aktxyz)
|
18
|
+
$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
|
19
|
+
|
20
|
+
require 'getoptlong'
|
21
|
+
#require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
|
22
|
+
require 'thread_generator' # memory doesn't leak with this one, at least nothing near as bad
|
23
|
+
require 'md5'
|
24
|
+
require 'tempfile'
|
25
|
+
require 's3try'
|
26
|
+
|
27
|
+
# after other mods, so we don't overwrite yaml vals with defaults
|
28
|
+
require 's3config'
|
29
|
+
include S3Config
|
30
|
+
|
31
|
+
$S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
|
32
|
+
$S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
|
33
|
+
$S3syncDirFile = Tempfile.new("s3sync")
|
34
|
+
$S3syncDirFile.puts $S3syncDirString
|
35
|
+
$S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
|
36
|
+
|
37
|
+
if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
|
38
|
+
File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
|
39
|
+
$mimeTypes = {}
|
40
|
+
f.each_line do |l|
|
41
|
+
if l =~ /^(\w\S+)\s+(\S.*)$/
|
42
|
+
type = $1
|
43
|
+
exts = $2.split
|
44
|
+
exts.each do |e|
|
45
|
+
$mimeTypes[e.to_s] = type.to_s
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def S3sync.main
|
53
|
+
# ---------- OPTIONS PROCESSING ---------- #
|
54
|
+
|
55
|
+
$S3syncOptions = Hash.new
|
56
|
+
optionsParser = GetoptLong.new(
|
57
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
58
|
+
[ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
|
59
|
+
[ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
|
60
|
+
[ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
|
61
|
+
[ '--delete', GetoptLong::NO_ARGUMENT ],
|
62
|
+
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
|
63
|
+
[ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
|
64
|
+
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
65
|
+
[ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
|
66
|
+
[ '--progress', GetoptLong::NO_ARGUMENT ],
|
67
|
+
[ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
|
68
|
+
[ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
|
69
|
+
[ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
|
70
|
+
[ '--make-dirs', GetoptLong::NO_ARGUMENT ],
|
71
|
+
[ '--no-md5', GetoptLong::NO_ARGUMENT ]
|
72
|
+
)
|
73
|
+
|
74
|
+
def S3sync.usage(message = nil)
|
75
|
+
$stderr.puts message if message
|
76
|
+
name = $0.split('/').last
|
77
|
+
$stderr.puts <<"ENDUSAGE"
|
78
|
+
#{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
|
79
|
+
--help -h --verbose -v --dryrun -n
|
80
|
+
--ssl -s --recursive -r --delete
|
81
|
+
--public-read -p --expires="<exp>" --cache-control="<cc>"
|
82
|
+
--exclude="<regexp>" --progress --debug -d
|
83
|
+
--make-dirs --no-md5
|
84
|
+
One of <source> or <destination> must be of S3 format, the other a local path.
|
85
|
+
Reminders:
|
86
|
+
* An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
|
87
|
+
mybucket:mypre/some/key/name
|
88
|
+
* Local paths should always use forward slashes '/' even on Windows
|
89
|
+
* Whether you use a trailing slash on the source path makes a difference.
|
90
|
+
* For examples see README.
|
91
|
+
ENDUSAGE
|
92
|
+
exit
|
93
|
+
end #usage
|
94
|
+
|
95
|
+
begin
|
96
|
+
optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
|
97
|
+
rescue StandardError
|
98
|
+
usage # the parser already printed an error message
|
99
|
+
end
|
100
|
+
usage if $S3syncOptions['--help']
|
101
|
+
$S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
|
102
|
+
$S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
|
34
103
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
rescue S3Sync::NoConfigFound => exc
|
39
|
-
# We can't proceed without having those two vars set
|
40
|
-
$stderr.puts "You didn't set up the following environment variables:"
|
41
|
-
$stderr.puts
|
42
|
-
exc.missing_vars.each {|var| $stderr.puts " * #{var}"}
|
43
|
-
$stderr.puts
|
104
|
+
|
105
|
+
# ---------- CONNECT ---------- #
|
106
|
+
S3sync::s3trySetup
|
44
107
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
108
|
+
# ---------- PREFIX PROCESSING ---------- #
|
109
|
+
|
110
|
+
def S3sync.s3Prefix?(pre)
|
111
|
+
# allow for dos-like things e.g. C:\ to be treated as local even with colon
|
112
|
+
pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
|
113
|
+
end
|
114
|
+
sourcePrefix, destinationPrefix = ARGV
|
115
|
+
usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
|
116
|
+
usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
|
117
|
+
usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
|
118
|
+
usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
|
49
119
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
120
|
+
# so we can modify them
|
121
|
+
sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
|
122
|
+
|
123
|
+
# handle trailing slash for source properly
|
124
|
+
if(sourcePrefix !~ %r{/$})
|
125
|
+
# no slash on end of source means we need to append the last src dir to dst prefix
|
126
|
+
# testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
|
127
|
+
slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
|
128
|
+
# not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
|
129
|
+
# take everything at the end after a slash or colon
|
130
|
+
destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
|
131
|
+
end
|
132
|
+
# no trailing slash on dest, ever.
|
133
|
+
destinationPrefix.sub!(%r{/$}, "")
|
134
|
+
|
135
|
+
# don't repeat slashes
|
136
|
+
sourcePrefix.squeeze!('/')
|
137
|
+
destinationPrefix.squeeze!('/')
|
138
|
+
|
139
|
+
# here's where we find out what direction we're going
|
140
|
+
sourceIsS3 = s3Prefix?(sourcePrefix)
|
141
|
+
# alias these variables to the other strings (in ruby = does not make copies of strings)
|
142
|
+
s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
|
143
|
+
localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
|
144
|
+
|
145
|
+
# canonicalize the S3 stuff
|
146
|
+
s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
|
147
|
+
s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
|
148
|
+
debug("s3Prefix #{s3Prefix}")
|
149
|
+
$S3SyncOriginalS3Prefix = s3Prefix.dup
|
150
|
+
|
151
|
+
# canonicalize the local stuff
|
152
|
+
# but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
|
153
|
+
# it will get re-stripped by the local generator after expressing this knowledge
|
154
|
+
localTrailingSlash = localPrefix.match(%r{/$})
|
155
|
+
localPrefix.replace(File.expand_path(localPrefix))
|
156
|
+
localPrefix += '/' if localTrailingSlash
|
157
|
+
debug("localPrefix #{localPrefix}")
|
158
|
+
# used for exclusion parsing
|
159
|
+
$S3SyncOriginalLocalPrefix = localPrefix.dup
|
160
|
+
|
161
|
+
# exclude preparation
|
162
|
+
# we don't want to build then throw away this regexp for each node in the universe; do it once globally
|
163
|
+
$S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
|
164
|
+
|
165
|
+
|
166
|
+
# ---------- GENERATORS ---------- #
|
167
|
+
|
168
|
+
|
169
|
+
# a generator that will return the files/dirs of the local tree one by one
|
170
|
+
# sorted and decorated for easy comparison with the S3 tree
|
171
|
+
localTree = Generator.new do |g|
|
172
|
+
def S3sync.localTreeRecurse(g, prefix, path)
|
173
|
+
debug("localTreeRecurse #{prefix} #{path}")
|
174
|
+
#if $S3syncOptions['--memory']
|
175
|
+
# $stderr.puts "Starting local recurse"
|
176
|
+
# stats = ostats stats
|
177
|
+
#end
|
178
|
+
d = nil
|
179
|
+
begin
|
180
|
+
slash = prefix.empty? ? "" : "/"
|
181
|
+
d = Dir.new(prefix + slash + path)
|
182
|
+
rescue Errno::ENOENT
|
183
|
+
# ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
|
184
|
+
return nil
|
185
|
+
rescue Errno::EACCES
|
186
|
+
# vista won't even let us touch some stuff in our own profile
|
187
|
+
return nil
|
188
|
+
end
|
189
|
+
# do some pre-processing
|
190
|
+
# the following sleight of hand is to make the recursion match the way s3 sorts
|
191
|
+
# take for example the directory 'foo' and the file 'foo.bar'
|
192
|
+
# when we encounter the dir we would want to recurse into it
|
193
|
+
# but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
|
194
|
+
# and the contents in that 'dir'
|
195
|
+
#
|
196
|
+
# so the solution is to not recurse into the directory until the point where
|
197
|
+
# it would come up "next" in the S3 list
|
198
|
+
# We have to do these hoops on the local side, because we have very little control
|
199
|
+
# over how S3 will return its results
|
200
|
+
toAdd = Array.new
|
201
|
+
d.each do |name|
|
202
|
+
slash = path.empty? ? "" : "/"
|
203
|
+
partialPath = path + slash + name
|
204
|
+
slash = prefix.empty? ? "" : "/"
|
205
|
+
fullPath = prefix + slash + partialPath
|
206
|
+
if name == "." or name == ".."
|
207
|
+
# skip
|
208
|
+
else
|
209
|
+
# add a dir node if appropriate
|
210
|
+
debug("Test #{fullPath}")
|
211
|
+
if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
|
212
|
+
debug("Adding it as a dir node")
|
213
|
+
toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
dItems = d.collect + toAdd
|
218
|
+
d.close
|
219
|
+
d = toAdd = nil
|
220
|
+
dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
|
221
|
+
dItems.each do |name|
|
222
|
+
isDirNode = false
|
223
|
+
if name.rindex('/') == name.length-1
|
224
|
+
name = name.slice(0...name.length-1)
|
225
|
+
isDirNode = true
|
226
|
+
debug("#{name} is a dir node")
|
227
|
+
end
|
228
|
+
slash = path.empty? ? "" : "/"
|
229
|
+
partialPath = path + slash + name
|
230
|
+
slash = prefix.empty? ? "" : "/"
|
231
|
+
fullPath = prefix + slash + partialPath
|
232
|
+
excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
|
233
|
+
if name == "." or name == ".."
|
234
|
+
# skip
|
235
|
+
elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
236
|
+
debug("skipping local item #{excludePath} because of --exclude")
|
237
|
+
elsif isDirNode
|
238
|
+
localTreeRecurse(g, prefix, partialPath)
|
239
|
+
else
|
240
|
+
# a normal looking node we should try to process
|
241
|
+
debug("local item #{fullPath}")
|
242
|
+
g.yield(LocalNode.new(prefix, partialPath))
|
243
|
+
end
|
244
|
+
end
|
245
|
+
#if $S3syncOptions['--memory']
|
246
|
+
# $stderr.puts "Ending local recurse"
|
247
|
+
# stats = ostats stats
|
248
|
+
#end
|
249
|
+
end
|
250
|
+
# a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
|
251
|
+
# so we need to think harder about what the user really meant in the command line.
|
252
|
+
localPrefixTrim = localPrefix
|
253
|
+
if localPrefix !~ %r{/$}
|
254
|
+
# no trailing slash, so yield the root itself first, then recurse if appropriate
|
255
|
+
# gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
|
256
|
+
g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
|
257
|
+
localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
|
258
|
+
else
|
259
|
+
# trailing slash, so ignore the root itself, and just go into the first level
|
260
|
+
localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
|
261
|
+
localTreeRecurse(g, localPrefixTrim, "")
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# a generator that will return the nodes in the S3 tree one by one
|
266
|
+
# sorted and decorated for easy comparison with the local tree
|
267
|
+
s3Tree = Generator.new do |g|
|
268
|
+
def S3sync.s3TreeRecurse(g, bucket, prefix, path)
|
269
|
+
if $S3syncOptions['--memory']
|
270
|
+
$stderr.puts "Starting S3 recurse"
|
271
|
+
GC.start
|
272
|
+
stats = ostats stats
|
273
|
+
end
|
274
|
+
$stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
|
275
|
+
nextPage = true
|
276
|
+
marker = ''
|
277
|
+
while nextPage do
|
278
|
+
fullPrefix = prefix + path
|
279
|
+
debug("nextPage: #{marker}") if marker != ''
|
280
|
+
options = {}
|
281
|
+
options['prefix'] = fullPrefix # start at the right depth
|
282
|
+
options['delimiter'] = '/' # only one dir at a time please
|
283
|
+
options['max-keys'] = '200' # use manageable chunks
|
284
|
+
options['marker'] = marker unless marker == ''
|
285
|
+
d = S3sync.S3try(:list_bucket, bucket, options)
|
286
|
+
$stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
|
287
|
+
# the 'directories' and leaf nodes are in two separate collections
|
288
|
+
# because a dir will never have the same name as a node, we can just shove them together and sort
|
289
|
+
# it's important to evaluate them alphabetically for efficient comparison to the local tree
|
290
|
+
tItems = d.entries + d.common_prefix_entries
|
291
|
+
tItems.sort! do |a,b|
|
292
|
+
aName = a.respond_to?('key') ? a.key : a.prefix
|
293
|
+
bName = b.respond_to?('key') ? b.key : b.prefix
|
294
|
+
# the full path will be returned, efficient to ignore the part we know will be in common
|
295
|
+
aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
|
296
|
+
end
|
297
|
+
# get rid of the big s3 objects asap, just save light-weight nodes and strings
|
298
|
+
items = tItems.collect do |item|
|
299
|
+
if item.respond_to?('key')
|
300
|
+
key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
301
|
+
Node.new(key, item.size, item.etag, item.last_modified)
|
302
|
+
else
|
303
|
+
Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
|
304
|
+
end
|
305
|
+
end
|
306
|
+
nextPage = d.properties.is_truncated
|
307
|
+
marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
|
308
|
+
# get this into native char set (because when we feed it back to s3 that's what it will expect)
|
309
|
+
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
|
310
|
+
tItems = nil
|
311
|
+
d = nil # get rid of this before recursing; it's big
|
312
|
+
item = nil
|
313
|
+
GC.start # not sure but I think yielding before doing this is causing evil closure bloat
|
314
|
+
items.each do |item|
|
315
|
+
if not (item.kind_of? String)
|
316
|
+
# this is an item
|
317
|
+
excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
|
318
|
+
if $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
319
|
+
debug("skipping S3 item #{excludePath} due to --exclude")
|
320
|
+
else
|
321
|
+
debug("S3 item #{item.name}")
|
322
|
+
g.yield(S3Node.new(bucket, prefix, item))
|
323
|
+
end
|
324
|
+
else
|
325
|
+
# it's a prefix (i.e. there are sub keys)
|
326
|
+
partialPath = item.slice(prefix.length..item.length) # will have trailing slash
|
327
|
+
excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
|
328
|
+
# recurse
|
329
|
+
if $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
330
|
+
debug("skipping prefix #{excludePath} due to --exclude")
|
331
|
+
else
|
332
|
+
debug("prefix found: #{partialPath}")
|
333
|
+
s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
items = nil
|
338
|
+
end # of while nextPage
|
339
|
+
if $S3syncOptions['--memory']
|
340
|
+
$stderr.puts "Ending S3 recurse"
|
341
|
+
GC.start
|
342
|
+
stats = ostats stats
|
343
|
+
end
|
344
|
+
end
|
345
|
+
# this will yield the root node first and then recurse
|
346
|
+
s3TreeRecurse(g, s3Bucket, s3Prefix, "")
|
347
|
+
|
348
|
+
end
|
349
|
+
|
350
|
+
# alias the tree objects so we don't care below which direction the transfer is going
|
351
|
+
if sourceIsS3
|
352
|
+
sourceTree, destinationTree = s3Tree, localTree
|
353
|
+
else
|
354
|
+
sourceTree, destinationTree = localTree, s3Tree
|
355
|
+
end
|
356
|
+
|
357
|
+
|
358
|
+
# ---------- COMPARATOR ---------- #
|
359
|
+
|
360
|
+
# run the comparison engine and act according to what we find for each check
|
361
|
+
nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
|
362
|
+
|
363
|
+
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
364
|
+
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
365
|
+
while sourceNode or destinationNode do
|
366
|
+
debug("source: #{sourceNode.name}") if sourceNode
|
367
|
+
debug("dest: #{destinationNode.name}") if destinationNode
|
368
|
+
if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
|
369
|
+
dNode =
|
370
|
+
if sourceNode.kind_of? LocalNode
|
371
|
+
S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
|
372
|
+
else
|
373
|
+
LocalNode.new(localPrefix, sourceNode.name)
|
374
|
+
end
|
375
|
+
puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
|
376
|
+
dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
|
377
|
+
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
378
|
+
elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
|
379
|
+
$stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
|
380
|
+
if $S3syncOptions['--delete']
|
381
|
+
if destinationNode.directory?
|
382
|
+
# have to wait
|
383
|
+
nodesToDelete.push(destinationNode)
|
384
|
+
else
|
385
|
+
puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
|
386
|
+
destinationNode.delete unless $S3syncOptions['--dryrun']
|
387
|
+
end
|
388
|
+
end
|
389
|
+
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
390
|
+
elsif sourceNode.name == destinationNode.name
|
391
|
+
if (sourceNode.size != destinationNode.size) or (($S3syncOptions['--no-md5'])? (sourceNode.date > destinationNode.date) : (sourceNode.tag != destinationNode.tag))
|
392
|
+
puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
|
393
|
+
destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
|
394
|
+
elsif $S3syncOptions['--debug']
|
395
|
+
$stderr.puts "Node #{sourceNode.name} unchanged"
|
396
|
+
end
|
397
|
+
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
398
|
+
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
# get rid of the (now empty, except for other directories) directories
|
403
|
+
nodesToDelete.reverse_each do |node|
|
404
|
+
puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
|
405
|
+
node.delete unless $S3syncOptions['--dryrun']
|
406
|
+
end
|
407
|
+
|
408
|
+
end #main
|
61
409
|
|
62
|
-
|
63
|
-
|
64
|
-
#
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
410
|
+
|
411
|
+
|
412
|
+
# ---------- NODE ---------- #
|
413
|
+
|
414
|
+
class Node
|
415
|
+
attr_reader :name
|
416
|
+
attr_reader :size
|
417
|
+
attr_reader :tag
|
418
|
+
attr_reader :date
|
419
|
+
def initialize(name='', size = 0, tag = '', date = Time.now.utc)
|
420
|
+
@name = name
|
421
|
+
@size = size
|
422
|
+
@tag = tag
|
423
|
+
@date = date
|
424
|
+
end
|
425
|
+
def directory?()
|
426
|
+
@tag == $S3syncDirTag and @size == $S3syncDirString.length
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# ---------- S3Node ---------- #
|
431
|
+
|
432
|
+
class S3Node < Node
|
433
|
+
@path = nil
|
434
|
+
@bucket = nil
|
435
|
+
@result = nil
|
436
|
+
def initialize(bucket, prefix, itemOrName)
|
437
|
+
@bucket = bucket
|
438
|
+
if itemOrName.kind_of? String
|
439
|
+
@name = itemOrName
|
440
|
+
@name.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
441
|
+
#6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
|
442
|
+
if (not prefix.empty? and @name.empty?)
|
443
|
+
@name = prefix
|
444
|
+
itemOrName = prefix
|
445
|
+
prefix = ""
|
446
|
+
end
|
447
|
+
slash = prefix.empty? ? "" : "/"
|
448
|
+
@path = prefix + slash + itemOrName
|
449
|
+
else
|
450
|
+
@name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
|
451
|
+
# depending whether the prefix is / tailed, the name might need trimming
|
452
|
+
@name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
|
453
|
+
@name.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
454
|
+
@path = itemOrName.name
|
455
|
+
@path.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
456
|
+
@size = itemOrName.size
|
457
|
+
@tag = itemOrName.tag.gsub(/"/,'')
|
458
|
+
@date = Time.xmlschema(itemOrName.date)
|
459
|
+
end
|
460
|
+
debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
|
461
|
+
end
|
462
|
+
# get this item from s3 into the provided stream
|
463
|
+
# S3 pushes to the local item, due to how http streaming is implemented
|
464
|
+
def to_stream(s)
|
465
|
+
@result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
|
466
|
+
end
|
467
|
+
def symlink?()
|
468
|
+
unless @result
|
469
|
+
@result = S3sync.S3try(:head, @bucket, @path)
|
470
|
+
end
|
471
|
+
debug("symlink value is: #{@result.object.metadata['symlink']}")
|
472
|
+
@result.object.metadata['symlink'] == 'true'
|
473
|
+
end
|
474
|
+
def owner
|
475
|
+
unless @result
|
476
|
+
@result = S3sync.S3try(:head, @bucket, @path)
|
477
|
+
end
|
478
|
+
debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
|
479
|
+
@result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
|
480
|
+
end
|
481
|
+
def group
|
482
|
+
unless @result
|
483
|
+
@result = S3sync.S3try(:head, @bucket, @path)
|
484
|
+
end
|
485
|
+
@result.object.metadata['group'].to_i # 0 default ok
|
486
|
+
end
|
487
|
+
def permissions
|
488
|
+
g = @result.object.metadata['permissions']
|
489
|
+
g ? g.to_i : 600 # default to owner only
|
490
|
+
end
|
491
|
+
def updateFrom(fromNode)
|
492
|
+
if fromNode.respond_to?(:stream)
|
493
|
+
meta = Hash.new
|
494
|
+
meta['owner'] = fromNode.owner.to_s
|
495
|
+
meta['group'] = fromNode.group.to_s
|
496
|
+
meta['permissions'] = fromNode.permissions.to_s
|
497
|
+
meta['symlink'] = 'true' if fromNode.symlink?
|
498
|
+
begin
|
499
|
+
theStream = fromNode.stream
|
500
|
+
theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
|
501
|
+
|
502
|
+
s3o = S3::S3Object.new(theStream, meta)
|
503
|
+
debug(@path)
|
504
|
+
headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
|
505
|
+
headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
|
506
|
+
headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
|
507
|
+
headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
|
508
|
+
fType = @path.split('.').last
|
509
|
+
debug("File extension: #{fType}")
|
510
|
+
if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
|
511
|
+
debug("Mime type: #{mType}")
|
512
|
+
headers['Content-Type'] = mType
|
513
|
+
end
|
514
|
+
@result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
|
515
|
+
theStream.close if (theStream and not theStream.closed?)
|
516
|
+
rescue NoMethodError
|
517
|
+
# when --progress is used and we can't get the stream object, it doesn't report as null
|
518
|
+
# so the above .closed? test will break
|
519
|
+
$stderr.puts "Skipping #{@path}: " + $!
|
520
|
+
rescue SystemCallError
|
521
|
+
theStream.close if (theStream and not theStream.closed?)
|
522
|
+
$stderr.puts "Skipping #{@path}: " + $!
|
523
|
+
end
|
524
|
+
else
|
525
|
+
raise "Node provided as update source doesn't support :stream"
|
526
|
+
end
|
527
|
+
end
|
528
|
+
def delete
|
529
|
+
@result = S3sync.S3try(:delete, @bucket, @path)
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
# ---------- LocalNode ---------- #
|
534
|
+
|
535
|
+
class LocalNode < Node
|
536
|
+
@path = nil
|
537
|
+
def initialize(prefix, partialPath)
|
538
|
+
slash = prefix.empty? ? "" : "/"
|
539
|
+
@path = prefix + slash + partialPath
|
540
|
+
# slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
|
541
|
+
@name = partialPath or ''
|
542
|
+
if FileTest.symlink?(@path)
|
543
|
+
# this could use the 'file' case below, but why create an extra temp file
|
544
|
+
linkData = File.readlink(@path)
|
545
|
+
$stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
|
546
|
+
@size = linkData.length
|
547
|
+
unless $S3syncOptions['--no-md5']
|
548
|
+
md5 = Digest::MD5.new()
|
549
|
+
md5 << linkData
|
550
|
+
@tag = md5.hexdigest
|
551
|
+
end
|
552
|
+
@date = File.lstat(@path).mtime.utc
|
553
|
+
elsif FileTest.file?(@path)
|
554
|
+
@size = FileTest.size(@path)
|
555
|
+
data = nil
|
556
|
+
begin
|
557
|
+
unless $S3syncOptions['--no-md5']
|
558
|
+
data = self.stream
|
559
|
+
md5 = Digest::MD5.new()
|
560
|
+
while !data.eof?
|
561
|
+
md5 << data.read(2048) # stream so it's not taking all memory
|
562
|
+
end
|
563
|
+
data.close
|
564
|
+
@tag = md5.hexdigest
|
565
|
+
end
|
566
|
+
@date = File.stat(@path).mtime.utc
|
567
|
+
rescue SystemCallError
|
568
|
+
# well we're not going to have an md5 that's for sure
|
569
|
+
@tag = nil
|
570
|
+
end
|
571
|
+
elsif FileTest.directory?(@path)
|
572
|
+
# all s3 directories are dummy nodes contain the same directory string
|
573
|
+
# so for easy comparison, set our size and tag thusly
|
574
|
+
@size = $S3syncDirString.length
|
575
|
+
@tag = $S3syncDirTag
|
576
|
+
@date = File.stat(@path).mtime.utc
|
577
|
+
end
|
578
|
+
debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
|
579
|
+
end
|
580
|
+
# return a stream that will read the contents of the local item
|
581
|
+
# local gets pulled by the S3Node update fn, due to how http streaming is implemented
|
582
|
+
def stream
|
583
|
+
begin
|
584
|
+
# 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
|
585
|
+
if FileTest.symlink?(@path) or FileTest.directory?(@path)
|
586
|
+
tf = Tempfile.new('s3sync')
|
587
|
+
if FileTest.symlink?(@path)
|
588
|
+
tf.printf('%s', File.readlink(@path))
|
589
|
+
elsif FileTest.directory?(@path)
|
590
|
+
tf.printf('%s', $S3syncDirString)
|
591
|
+
end
|
592
|
+
tf.close
|
593
|
+
tf.open
|
594
|
+
tf
|
595
|
+
elsif FileTest.file?(@path)
|
596
|
+
File.open(@path, 'rb')
|
597
|
+
end
|
598
|
+
rescue SystemCallError
|
599
|
+
$stderr.puts "Could not read #{@path}: #{$!}"
|
600
|
+
raise
|
601
|
+
end
|
602
|
+
end
|
603
|
+
def stat
|
604
|
+
FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
|
605
|
+
end
|
606
|
+
def exist?
|
607
|
+
FileTest.exist?(@path) or FileTest.symlink?(@path)
|
608
|
+
end
|
609
|
+
def owner
|
610
|
+
self.exist? ? self.stat().uid : 0
|
611
|
+
end
|
612
|
+
def group
|
613
|
+
self.exist? ? self.stat().gid : 0
|
614
|
+
end
|
615
|
+
def permissions
|
616
|
+
self.exist? ? self.stat().mode : 600
|
617
|
+
end
|
618
|
+
def updateFrom(fromNode)
|
619
|
+
if fromNode.respond_to?(:to_stream)
|
620
|
+
fName = @path + '.s3syncTemp'
|
621
|
+
# handle the case where the user wants us to create dirs that don't exist in S3
|
622
|
+
if $S3syncOptions['--make-dirs']
|
623
|
+
# ensure target's path exists
|
624
|
+
dirs = @path.split('/')
|
625
|
+
# but the last one is a file name
|
626
|
+
dirs.pop()
|
627
|
+
current = ''
|
628
|
+
dirs.each do |dir|
|
629
|
+
current << dir << '/'
|
630
|
+
begin
|
631
|
+
Dir.mkdir(current) unless FileTest.exist?(current)
|
632
|
+
rescue SystemCallError
|
633
|
+
$stderr.puts "Could not mkdir #{current}: #{$!}"
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end
|
637
|
+
unless fromNode.directory?
|
638
|
+
f = File.open(fName, 'wb')
|
639
|
+
f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
|
640
|
+
|
641
|
+
fromNode.to_stream(f)
|
642
|
+
f.close
|
643
|
+
end
|
644
|
+
# get original item out of the way
|
645
|
+
File.unlink(@path) if File.exist?(@path)
|
646
|
+
if fromNode.symlink?
|
647
|
+
linkTo = ''
|
648
|
+
File.open(fName, 'rb'){|f| linkTo = f.read}
|
649
|
+
debug("#{@path} will be a symlink to #{linkTo}")
|
650
|
+
begin
|
651
|
+
File.symlink(linkTo, @path)
|
652
|
+
rescue NotImplementedError
|
653
|
+
# windows doesn't do symlinks, for example
|
654
|
+
# just bail
|
655
|
+
File.unlink(fName) if File.exist?(fName)
|
656
|
+
return
|
657
|
+
rescue SystemCallError
|
658
|
+
$stderr.puts "Could not write symlink #{@path}: #{$!}"
|
659
|
+
end
|
660
|
+
elsif fromNode.directory?
|
661
|
+
# only get here when the dir doesn't exist. else they'd compare ==
|
662
|
+
debug(@path)
|
663
|
+
begin
|
664
|
+
Dir.mkdir(@path) unless FileTest.exist?(@path)
|
665
|
+
rescue SystemCallError
|
666
|
+
$stderr.puts "Could not mkdir #{@path}: #{$!}"
|
667
|
+
end
|
668
|
+
|
669
|
+
else
|
670
|
+
begin
|
671
|
+
File.rename(fName, @path)
|
672
|
+
rescue SystemCallError
|
673
|
+
$stderr.puts "Could not write (rename) #{@path}: #{$!}"
|
674
|
+
end
|
675
|
+
|
676
|
+
end
|
677
|
+
# clean up if the temp file is still there (as for links)
|
678
|
+
File.unlink(fName) if File.exist?(fName)
|
679
|
+
|
680
|
+
# update permissions
|
681
|
+
linkCommand = fromNode.symlink? ? 'l' : ''
|
682
|
+
begin
|
683
|
+
File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
|
684
|
+
File.send(linkCommand + 'chmod', fromNode.permissions, @path)
|
685
|
+
rescue NotImplementedError
|
686
|
+
# no one has lchmod, but who really cares
|
687
|
+
rescue SystemCallError
|
688
|
+
$stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
|
689
|
+
end
|
690
|
+
else
|
691
|
+
raise "Node provided as update source doesn't support :to_stream"
|
692
|
+
end
|
693
|
+
end
|
694
|
+
def symlink?()
|
695
|
+
FileTest.symlink?(@path)
|
696
|
+
end
|
697
|
+
def delete
|
698
|
+
# don't try to delete the restore root dir
|
699
|
+
# this is a quick fix to deal with the fact that the tree recurse has to visit the root node
|
700
|
+
return unless @name != ''
|
701
|
+
return unless FileTest.exist?(@path)
|
702
|
+
begin
|
703
|
+
if FileTest.directory?(@path)
|
704
|
+
Dir.rmdir(@path)
|
705
|
+
else
|
706
|
+
File.unlink(@path)
|
707
|
+
end
|
708
|
+
rescue SystemCallError
|
709
|
+
$stderr.puts "Could not delete #{@path}: #{$!}"
|
710
|
+
end
|
711
|
+
end
|
712
|
+
end
|
713
|
+
|
714
|
+
|
715
|
+
end #module
|
716
|
+
|
717
|
+
def debug(str)
|
718
|
+
$stderr.puts str if $S3syncOptions['--debug']
|
76
719
|
end
|
720
|
+
|
721
|
+
def ostats(last_stat = nil)
|
722
|
+
stats = Hash.new(0)
|
723
|
+
ObjectSpace.each_object {|o| stats[o.class] += 1}
|
724
|
+
|
725
|
+
stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
|
726
|
+
$stderr.printf "%-30s %10d", k, v
|
727
|
+
$stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
|
728
|
+
$stderr.puts
|
729
|
+
end
|
730
|
+
|
731
|
+
stats
|
732
|
+
end
|
733
|
+
|
734
|
+
# go!
|
735
|
+
S3sync::main
|