s3sync 1.2.5 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/s3sync +67 -726
- data/lib/s3sync.rb +2 -0
- data/lib/s3sync/cli.rb +475 -0
- data/lib/s3sync/config.rb +98 -0
- data/lib/s3sync/exceptions.rb +55 -0
- data/lib/s3sync/sync.rb +371 -0
- data/lib/s3sync/util.rb +29 -0
- data/lib/s3sync/version.rb +27 -0
- metadata +177 -54
- data/CHANGELOG +0 -175
- data/README +0 -401
- data/README_s3cmd +0 -172
- data/Rakefile +0 -35
- data/bin/s3cmd +0 -245
- data/lib/HTTPStreaming.rb +0 -103
- data/lib/S3.rb +0 -707
- data/lib/S3_s3sync_mod.rb +0 -143
- data/lib/S3encoder.rb +0 -50
- data/lib/s3config.rb +0 -27
- data/lib/s3try.rb +0 -161
- data/lib/thread_generator.rb +0 -383
- data/lib/version.rb +0 -9
- data/setup.rb +0 -1585
data/bin/s3sync
CHANGED
@@ -1,735 +1,76 @@
|
|
1
|
-
|
2
|
-
# This software code is made available "AS IS" without warranties of any
|
3
|
-
# kind. You may copy, display, modify and redistribute the software
|
4
|
-
# code either by itself or as incorporated into your code; provided that
|
5
|
-
# you do not remove any proprietary notices. Your use of this software
|
6
|
-
# code is at your own risk and you waive any claim against the author
|
7
|
-
# with respect to your use of this software code.
|
8
|
-
# (c) 2007 s3sync.net
|
1
|
+
#!/usr/bin/env ruby
|
9
2
|
#
|
3
|
+
# s3sync - Tool belt for managing your S3 buckets
|
4
|
+
#
|
5
|
+
# The MIT License (MIT)
|
6
|
+
#
|
7
|
+
# Copyright (c) 2013 Lincoln de Sousa <lincoln@clarete.li>
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
10
|
+
# of this software and associated documentation files (the "Software"), to deal
|
11
|
+
# in the Software without restriction, including without limitation the rights
|
12
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
13
|
+
# copies of the Software, and to permit persons to whom the Software is
|
14
|
+
# furnished to do so, subject to the following conditions:
|
15
|
+
#
|
16
|
+
# The above copyright notice and this permission notice shall be included in
|
17
|
+
# all copies or substantial portions of the Software.
|
18
|
+
#
|
19
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
20
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
21
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
22
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
23
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
24
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
25
|
+
# THE SOFTWARE.
|
10
26
|
|
11
|
-
|
12
|
-
|
13
|
-
$S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
|
14
|
-
|
15
|
-
$S3SYNC_VERSION = '1.2.5'
|
16
|
-
|
17
|
-
# always look "here" for include files (thanks aktxyz)
|
18
|
-
$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
|
19
|
-
|
20
|
-
require 'getoptlong'
|
21
|
-
#require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
|
22
|
-
require 'thread_generator' # memory doesn't leak with this one, at least nothing near as bad
|
23
|
-
require 'md5'
|
24
|
-
require 'tempfile'
|
25
|
-
require 's3try'
|
26
|
-
|
27
|
-
# after other mods, so we don't overwrite yaml vals with defaults
|
28
|
-
require 's3config'
|
29
|
-
include S3Config
|
30
|
-
|
31
|
-
$S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
|
32
|
-
$S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
|
33
|
-
$S3syncDirFile = Tempfile.new("s3sync")
|
34
|
-
$S3syncDirFile.puts $S3syncDirString
|
35
|
-
$S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
|
36
|
-
|
37
|
-
if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
|
38
|
-
File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
|
39
|
-
$mimeTypes = {}
|
40
|
-
f.each_line do |l|
|
41
|
-
if l =~ /^(\w\S+)\s+(\S.*)$/
|
42
|
-
type = $1
|
43
|
-
exts = $2.split
|
44
|
-
exts.each do |e|
|
45
|
-
$mimeTypes[e.to_s] = type.to_s
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def S3sync.main
|
53
|
-
# ---------- OPTIONS PROCESSING ---------- #
|
54
|
-
|
55
|
-
$S3syncOptions = Hash.new
|
56
|
-
optionsParser = GetoptLong.new(
|
57
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
58
|
-
[ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
|
59
|
-
[ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
|
60
|
-
[ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
|
61
|
-
[ '--delete', GetoptLong::NO_ARGUMENT ],
|
62
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
|
63
|
-
[ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
|
64
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
65
|
-
[ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
|
66
|
-
[ '--progress', GetoptLong::NO_ARGUMENT ],
|
67
|
-
[ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
|
68
|
-
[ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
|
69
|
-
[ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
|
70
|
-
[ '--make-dirs', GetoptLong::NO_ARGUMENT ],
|
71
|
-
[ '--no-md5', GetoptLong::NO_ARGUMENT ]
|
72
|
-
)
|
73
|
-
|
74
|
-
def S3sync.usage(message = nil)
|
75
|
-
$stderr.puts message if message
|
76
|
-
name = $0.split('/').last
|
77
|
-
$stderr.puts <<"ENDUSAGE"
|
78
|
-
#{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
|
79
|
-
--help -h --verbose -v --dryrun -n
|
80
|
-
--ssl -s --recursive -r --delete
|
81
|
-
--public-read -p --expires="<exp>" --cache-control="<cc>"
|
82
|
-
--exclude="<regexp>" --progress --debug -d
|
83
|
-
--make-dirs --no-md5
|
84
|
-
One of <source> or <destination> must be of S3 format, the other a local path.
|
85
|
-
Reminders:
|
86
|
-
* An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
|
87
|
-
mybucket:mypre/some/key/name
|
88
|
-
* Local paths should always use forward slashes '/' even on Windows
|
89
|
-
* Whether you use a trailing slash on the source path makes a difference.
|
90
|
-
* For examples see README.
|
91
|
-
ENDUSAGE
|
92
|
-
exit
|
93
|
-
end #usage
|
94
|
-
|
95
|
-
begin
|
96
|
-
optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
|
97
|
-
rescue StandardError
|
98
|
-
usage # the parser already printed an error message
|
99
|
-
end
|
100
|
-
usage if $S3syncOptions['--help']
|
101
|
-
$S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
|
102
|
-
$S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
|
103
|
-
|
104
|
-
|
105
|
-
# ---------- CONNECT ---------- #
|
106
|
-
S3sync::s3trySetup
|
107
|
-
|
108
|
-
# ---------- PREFIX PROCESSING ---------- #
|
109
|
-
|
110
|
-
def S3sync.s3Prefix?(pre)
|
111
|
-
# allow for dos-like things e.g. C:\ to be treated as local even with colon
|
112
|
-
pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
|
113
|
-
end
|
114
|
-
sourcePrefix, destinationPrefix = ARGV
|
115
|
-
usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
|
116
|
-
usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
|
117
|
-
usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
|
118
|
-
usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
|
119
|
-
|
120
|
-
# so we can modify them
|
121
|
-
sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
|
27
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib') unless $:.include?(File.dirname(__FILE__) + '/../lib')
|
122
28
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
# testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
|
127
|
-
slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
|
128
|
-
# not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
|
129
|
-
# take everything at the end after a slash or colon
|
130
|
-
destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
|
131
|
-
end
|
132
|
-
# no trailing slash on dest, ever.
|
133
|
-
destinationPrefix.sub!(%r{/$}, "")
|
134
|
-
|
135
|
-
# don't repeat slashes
|
136
|
-
sourcePrefix.squeeze!('/')
|
137
|
-
destinationPrefix.squeeze!('/')
|
138
|
-
|
139
|
-
# here's where we find out what direction we're going
|
140
|
-
sourceIsS3 = s3Prefix?(sourcePrefix)
|
141
|
-
# alias these variables to the other strings (in ruby = does not make copies of strings)
|
142
|
-
s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
|
143
|
-
localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
|
144
|
-
|
145
|
-
# canonicalize the S3 stuff
|
146
|
-
s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
|
147
|
-
s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
|
148
|
-
debug("s3Prefix #{s3Prefix}")
|
149
|
-
$S3SyncOriginalS3Prefix = s3Prefix.dup
|
150
|
-
|
151
|
-
# canonicalize the local stuff
|
152
|
-
# but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
|
153
|
-
# it will get re-stripped by the local generator after expressing this knowledge
|
154
|
-
localTrailingSlash = localPrefix.match(%r{/$})
|
155
|
-
localPrefix.replace(File.expand_path(localPrefix))
|
156
|
-
localPrefix += '/' if localTrailingSlash
|
157
|
-
debug("localPrefix #{localPrefix}")
|
158
|
-
# used for exclusion parsing
|
159
|
-
$S3SyncOriginalLocalPrefix = localPrefix.dup
|
160
|
-
|
161
|
-
# exclude preparation
|
162
|
-
# we don't want to build then throw away this regexp for each node in the universe; do it once globally
|
163
|
-
$S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
|
164
|
-
|
165
|
-
|
166
|
-
# ---------- GENERATORS ---------- #
|
167
|
-
|
168
|
-
|
169
|
-
# a generator that will return the files/dirs of the local tree one by one
|
170
|
-
# sorted and decorated for easy comparison with the S3 tree
|
171
|
-
localTree = Generator.new do |g|
|
172
|
-
def S3sync.localTreeRecurse(g, prefix, path)
|
173
|
-
debug("localTreeRecurse #{prefix} #{path}")
|
174
|
-
#if $S3syncOptions['--memory']
|
175
|
-
# $stderr.puts "Starting local recurse"
|
176
|
-
# stats = ostats stats
|
177
|
-
#end
|
178
|
-
d = nil
|
179
|
-
begin
|
180
|
-
slash = prefix.empty? ? "" : "/"
|
181
|
-
d = Dir.new(prefix + slash + path)
|
182
|
-
rescue Errno::ENOENT
|
183
|
-
# ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
|
184
|
-
return nil
|
185
|
-
rescue Errno::EACCES
|
186
|
-
# vista won't even let us touch some stuff in our own profile
|
187
|
-
return nil
|
188
|
-
end
|
189
|
-
# do some pre-processing
|
190
|
-
# the following sleight of hand is to make the recursion match the way s3 sorts
|
191
|
-
# take for example the directory 'foo' and the file 'foo.bar'
|
192
|
-
# when we encounter the dir we would want to recurse into it
|
193
|
-
# but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
|
194
|
-
# and the contents in that 'dir'
|
195
|
-
#
|
196
|
-
# so the solution is to not recurse into the directory until the point where
|
197
|
-
# it would come up "next" in the S3 list
|
198
|
-
# We have to do these hoops on the local side, because we have very little control
|
199
|
-
# over how S3 will return its results
|
200
|
-
toAdd = Array.new
|
201
|
-
d.each do |name|
|
202
|
-
slash = path.empty? ? "" : "/"
|
203
|
-
partialPath = path + slash + name
|
204
|
-
slash = prefix.empty? ? "" : "/"
|
205
|
-
fullPath = prefix + slash + partialPath
|
206
|
-
if name == "." or name == ".."
|
207
|
-
# skip
|
208
|
-
else
|
209
|
-
# add a dir node if appropriate
|
210
|
-
debug("Test #{fullPath}")
|
211
|
-
if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
|
212
|
-
debug("Adding it as a dir node")
|
213
|
-
toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
|
214
|
-
end
|
215
|
-
end
|
216
|
-
end
|
217
|
-
dItems = d.collect + toAdd
|
218
|
-
d.close
|
219
|
-
d = toAdd = nil
|
220
|
-
dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
|
221
|
-
dItems.each do |name|
|
222
|
-
isDirNode = false
|
223
|
-
if name.rindex('/') == name.length-1
|
224
|
-
name = name.slice(0...name.length-1)
|
225
|
-
isDirNode = true
|
226
|
-
debug("#{name} is a dir node")
|
227
|
-
end
|
228
|
-
slash = path.empty? ? "" : "/"
|
229
|
-
partialPath = path + slash + name
|
230
|
-
slash = prefix.empty? ? "" : "/"
|
231
|
-
fullPath = prefix + slash + partialPath
|
232
|
-
excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
|
233
|
-
if name == "." or name == ".."
|
234
|
-
# skip
|
235
|
-
elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
236
|
-
debug("skipping local item #{excludePath} because of --exclude")
|
237
|
-
elsif isDirNode
|
238
|
-
localTreeRecurse(g, prefix, partialPath)
|
239
|
-
else
|
240
|
-
# a normal looking node we should try to process
|
241
|
-
debug("local item #{fullPath}")
|
242
|
-
g.yield(LocalNode.new(prefix, partialPath))
|
243
|
-
end
|
244
|
-
end
|
245
|
-
#if $S3syncOptions['--memory']
|
246
|
-
# $stderr.puts "Ending local recurse"
|
247
|
-
# stats = ostats stats
|
248
|
-
#end
|
249
|
-
end
|
250
|
-
# a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
|
251
|
-
# so we need to think harder about what the user really meant in the command line.
|
252
|
-
localPrefixTrim = localPrefix
|
253
|
-
if localPrefix !~ %r{/$}
|
254
|
-
# no trailing slash, so yield the root itself first, then recurse if appropriate
|
255
|
-
# gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
|
256
|
-
g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
|
257
|
-
localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
|
258
|
-
else
|
259
|
-
# trailing slash, so ignore the root itself, and just go into the first level
|
260
|
-
localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
|
261
|
-
localTreeRecurse(g, localPrefixTrim, "")
|
262
|
-
end
|
263
|
-
end
|
264
|
-
|
265
|
-
# a generator that will return the nodes in the S3 tree one by one
|
266
|
-
# sorted and decorated for easy comparison with the local tree
|
267
|
-
s3Tree = Generator.new do |g|
|
268
|
-
def S3sync.s3TreeRecurse(g, bucket, prefix, path)
|
269
|
-
if $S3syncOptions['--memory']
|
270
|
-
$stderr.puts "Starting S3 recurse"
|
271
|
-
GC.start
|
272
|
-
stats = ostats stats
|
273
|
-
end
|
274
|
-
$stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
|
275
|
-
nextPage = true
|
276
|
-
marker = ''
|
277
|
-
while nextPage do
|
278
|
-
fullPrefix = prefix + path
|
279
|
-
debug("nextPage: #{marker}") if marker != ''
|
280
|
-
options = {}
|
281
|
-
options['prefix'] = fullPrefix # start at the right depth
|
282
|
-
options['delimiter'] = '/' # only one dir at a time please
|
283
|
-
options['max-keys'] = '200' # use manageable chunks
|
284
|
-
options['marker'] = marker unless marker == ''
|
285
|
-
d = S3sync.S3try(:list_bucket, bucket, options)
|
286
|
-
$stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
|
287
|
-
# the 'directories' and leaf nodes are in two separate collections
|
288
|
-
# because a dir will never have the same name as a node, we can just shove them together and sort
|
289
|
-
# it's important to evaluate them alphabetically for efficient comparison to the local tree
|
290
|
-
tItems = d.entries + d.common_prefix_entries
|
291
|
-
tItems.sort! do |a,b|
|
292
|
-
aName = a.respond_to?('key') ? a.key : a.prefix
|
293
|
-
bName = b.respond_to?('key') ? b.key : b.prefix
|
294
|
-
# the full path will be returned, efficient to ignore the part we know will be in common
|
295
|
-
aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
|
296
|
-
end
|
297
|
-
# get rid of the big s3 objects asap, just save light-weight nodes and strings
|
298
|
-
items = tItems.collect do |item|
|
299
|
-
if item.respond_to?('key')
|
300
|
-
key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
301
|
-
Node.new(key, item.size, item.etag, item.last_modified)
|
302
|
-
else
|
303
|
-
Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
|
304
|
-
end
|
305
|
-
end
|
306
|
-
nextPage = d.properties.is_truncated
|
307
|
-
marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
|
308
|
-
# get this into native char set (because when we feed it back to s3 that's what it will expect)
|
309
|
-
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
|
310
|
-
tItems = nil
|
311
|
-
d = nil # get rid of this before recursing; it's big
|
312
|
-
item = nil
|
313
|
-
GC.start # not sure but I think yielding before doing this is causing evil closure bloat
|
314
|
-
items.each do |item|
|
315
|
-
if not (item.kind_of? String)
|
316
|
-
# this is an item
|
317
|
-
excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
|
318
|
-
if $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
319
|
-
debug("skipping S3 item #{excludePath} due to --exclude")
|
320
|
-
else
|
321
|
-
debug("S3 item #{item.name}")
|
322
|
-
g.yield(S3Node.new(bucket, prefix, item))
|
323
|
-
end
|
324
|
-
else
|
325
|
-
# it's a prefix (i.e. there are sub keys)
|
326
|
-
partialPath = item.slice(prefix.length..item.length) # will have trailing slash
|
327
|
-
excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
|
328
|
-
# recurse
|
329
|
-
if $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
330
|
-
debug("skipping prefix #{excludePath} due to --exclude")
|
331
|
-
else
|
332
|
-
debug("prefix found: #{partialPath}")
|
333
|
-
s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
|
334
|
-
end
|
335
|
-
end
|
336
|
-
end
|
337
|
-
items = nil
|
338
|
-
end # of while nextPage
|
339
|
-
if $S3syncOptions['--memory']
|
340
|
-
$stderr.puts "Ending S3 recurse"
|
341
|
-
GC.start
|
342
|
-
stats = ostats stats
|
343
|
-
end
|
344
|
-
end
|
345
|
-
# this will yield the root node first and then recurse
|
346
|
-
s3TreeRecurse(g, s3Bucket, s3Prefix, "")
|
347
|
-
|
348
|
-
end
|
349
|
-
|
350
|
-
# alias the tree objects so we don't care below which direction the transfer is going
|
351
|
-
if sourceIsS3
|
352
|
-
sourceTree, destinationTree = s3Tree, localTree
|
353
|
-
else
|
354
|
-
sourceTree, destinationTree = localTree, s3Tree
|
355
|
-
end
|
356
|
-
|
357
|
-
|
358
|
-
# ---------- COMPARATOR ---------- #
|
359
|
-
|
360
|
-
# run the comparison engine and act according to what we find for each check
|
361
|
-
nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
|
362
|
-
|
363
|
-
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
364
|
-
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
365
|
-
while sourceNode or destinationNode do
|
366
|
-
debug("source: #{sourceNode.name}") if sourceNode
|
367
|
-
debug("dest: #{destinationNode.name}") if destinationNode
|
368
|
-
if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
|
369
|
-
dNode =
|
370
|
-
if sourceNode.kind_of? LocalNode
|
371
|
-
S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
|
372
|
-
else
|
373
|
-
LocalNode.new(localPrefix, sourceNode.name)
|
374
|
-
end
|
375
|
-
puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
|
376
|
-
dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
|
377
|
-
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
378
|
-
elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
|
379
|
-
$stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
|
380
|
-
if $S3syncOptions['--delete']
|
381
|
-
if destinationNode.directory?
|
382
|
-
# have to wait
|
383
|
-
nodesToDelete.push(destinationNode)
|
384
|
-
else
|
385
|
-
puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
|
386
|
-
destinationNode.delete unless $S3syncOptions['--dryrun']
|
387
|
-
end
|
388
|
-
end
|
389
|
-
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
390
|
-
elsif sourceNode.name == destinationNode.name
|
391
|
-
if (sourceNode.size != destinationNode.size) or (($S3syncOptions['--no-md5'])? (sourceNode.date > destinationNode.date) : (sourceNode.tag != destinationNode.tag))
|
392
|
-
puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
|
393
|
-
destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
|
394
|
-
elsif $S3syncOptions['--debug']
|
395
|
-
$stderr.puts "Node #{sourceNode.name} unchanged"
|
396
|
-
end
|
397
|
-
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
398
|
-
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
399
|
-
end
|
400
|
-
end
|
401
|
-
|
402
|
-
# get rid of the (now empty, except for other directories) directories
|
403
|
-
nodesToDelete.reverse_each do |node|
|
404
|
-
puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
|
405
|
-
node.delete unless $S3syncOptions['--dryrun']
|
406
|
-
end
|
407
|
-
|
408
|
-
end #main
|
29
|
+
require "s3sync/exceptions"
|
30
|
+
require "s3sync/config"
|
31
|
+
require "s3sync/cli"
|
409
32
|
|
410
|
-
|
411
|
-
|
412
|
-
# ---------- NODE ---------- #
|
413
|
-
|
414
|
-
class Node
|
415
|
-
attr_reader :name
|
416
|
-
attr_reader :size
|
417
|
-
attr_reader :tag
|
418
|
-
attr_reader :date
|
419
|
-
def initialize(name='', size = 0, tag = '', date = Time.now.utc)
|
420
|
-
@name = name
|
421
|
-
@size = size
|
422
|
-
@tag = tag
|
423
|
-
@date = date
|
424
|
-
end
|
425
|
-
def directory?()
|
426
|
-
@tag == $S3syncDirTag and @size == $S3syncDirString.length
|
427
|
-
end
|
428
|
-
end
|
429
|
-
|
430
|
-
# ---------- S3Node ---------- #
|
431
|
-
|
432
|
-
class S3Node < Node
|
433
|
-
@path = nil
|
434
|
-
@bucket = nil
|
435
|
-
@result = nil
|
436
|
-
def initialize(bucket, prefix, itemOrName)
|
437
|
-
@bucket = bucket
|
438
|
-
if itemOrName.kind_of? String
|
439
|
-
@name = itemOrName
|
440
|
-
@name.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
441
|
-
#6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
|
442
|
-
if (not prefix.empty? and @name.empty?)
|
443
|
-
@name = prefix
|
444
|
-
itemOrName = prefix
|
445
|
-
prefix = ""
|
446
|
-
end
|
447
|
-
slash = prefix.empty? ? "" : "/"
|
448
|
-
@path = prefix + slash + itemOrName
|
449
|
-
else
|
450
|
-
@name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
|
451
|
-
# depending whether the prefix is / tailed, the name might need trimming
|
452
|
-
@name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
|
453
|
-
@name.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
454
|
-
@path = itemOrName.name
|
455
|
-
@path.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
456
|
-
@size = itemOrName.size
|
457
|
-
@tag = itemOrName.tag.gsub(/"/,'')
|
458
|
-
@date = Time.xmlschema(itemOrName.date)
|
459
|
-
end
|
460
|
-
debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
|
461
|
-
end
|
462
|
-
# get this item from s3 into the provided stream
|
463
|
-
# S3 pushes to the local item, due to how http streaming is implemented
|
464
|
-
def to_stream(s)
|
465
|
-
@result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
|
466
|
-
end
|
467
|
-
def symlink?()
|
468
|
-
unless @result
|
469
|
-
@result = S3sync.S3try(:head, @bucket, @path)
|
470
|
-
end
|
471
|
-
debug("symlink value is: #{@result.object.metadata['symlink']}")
|
472
|
-
@result.object.metadata['symlink'] == 'true'
|
473
|
-
end
|
474
|
-
def owner
|
475
|
-
unless @result
|
476
|
-
@result = S3sync.S3try(:head, @bucket, @path)
|
477
|
-
end
|
478
|
-
debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
|
479
|
-
@result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
|
480
|
-
end
|
481
|
-
def group
|
482
|
-
unless @result
|
483
|
-
@result = S3sync.S3try(:head, @bucket, @path)
|
484
|
-
end
|
485
|
-
@result.object.metadata['group'].to_i # 0 default ok
|
486
|
-
end
|
487
|
-
def permissions
|
488
|
-
g = @result.object.metadata['permissions']
|
489
|
-
g ? g.to_i : 600 # default to owner only
|
490
|
-
end
|
491
|
-
def updateFrom(fromNode)
|
492
|
-
if fromNode.respond_to?(:stream)
|
493
|
-
meta = Hash.new
|
494
|
-
meta['owner'] = fromNode.owner.to_s
|
495
|
-
meta['group'] = fromNode.group.to_s
|
496
|
-
meta['permissions'] = fromNode.permissions.to_s
|
497
|
-
meta['symlink'] = 'true' if fromNode.symlink?
|
498
|
-
begin
|
499
|
-
theStream = fromNode.stream
|
500
|
-
theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
|
33
|
+
conf = S3Sync::Config.new
|
501
34
|
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
debug("Mime type: #{mType}")
|
512
|
-
headers['Content-Type'] = mType
|
513
|
-
end
|
514
|
-
@result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
|
515
|
-
theStream.close if (theStream and not theStream.closed?)
|
516
|
-
rescue NoMethodError
|
517
|
-
# when --progress is used and we can't get the stream object, it doesn't report as null
|
518
|
-
# so the above .closed? test will break
|
519
|
-
$stderr.puts "Skipping #{@path}: " + $!
|
520
|
-
rescue SystemCallError
|
521
|
-
theStream.close if (theStream and not theStream.closed?)
|
522
|
-
$stderr.puts "Skipping #{@path}: " + $!
|
523
|
-
end
|
524
|
-
else
|
525
|
-
raise "Node provided as update source doesn't support :stream"
|
526
|
-
end
|
527
|
-
end
|
528
|
-
def delete
|
529
|
-
@result = S3sync.S3try(:delete, @bucket, @path)
|
530
|
-
end
|
531
|
-
end
|
532
|
-
|
533
|
-
# ---------- LocalNode ---------- #
|
534
|
-
|
535
|
-
class LocalNode < Node
|
536
|
-
@path = nil
|
537
|
-
def initialize(prefix, partialPath)
|
538
|
-
slash = prefix.empty? ? "" : "/"
|
539
|
-
@path = prefix + slash + partialPath
|
540
|
-
# slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
|
541
|
-
@name = partialPath or ''
|
542
|
-
if FileTest.symlink?(@path)
|
543
|
-
# this could use the 'file' case below, but why create an extra temp file
|
544
|
-
linkData = File.readlink(@path)
|
545
|
-
$stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
|
546
|
-
@size = linkData.length
|
547
|
-
unless $S3syncOptions['--no-md5']
|
548
|
-
md5 = Digest::MD5.new()
|
549
|
-
md5 << linkData
|
550
|
-
@tag = md5.hexdigest
|
551
|
-
end
|
552
|
-
@date = File.lstat(@path).mtime.utc
|
553
|
-
elsif FileTest.file?(@path)
|
554
|
-
@size = FileTest.size(@path)
|
555
|
-
data = nil
|
556
|
-
begin
|
557
|
-
unless $S3syncOptions['--no-md5']
|
558
|
-
data = self.stream
|
559
|
-
md5 = Digest::MD5.new()
|
560
|
-
while !data.eof?
|
561
|
-
md5 << data.read(2048) # stream so it's not taking all memory
|
562
|
-
end
|
563
|
-
data.close
|
564
|
-
@tag = md5.hexdigest
|
565
|
-
end
|
566
|
-
@date = File.stat(@path).mtime.utc
|
567
|
-
rescue SystemCallError
|
568
|
-
# well we're not going to have an md5 that's for sure
|
569
|
-
@tag = nil
|
570
|
-
end
|
571
|
-
elsif FileTest.directory?(@path)
|
572
|
-
# all s3 directories are dummy nodes contain the same directory string
|
573
|
-
# so for easy comparison, set our size and tag thusly
|
574
|
-
@size = $S3syncDirString.length
|
575
|
-
@tag = $S3syncDirTag
|
576
|
-
@date = File.stat(@path).mtime.utc
|
577
|
-
end
|
578
|
-
debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
|
579
|
-
end
|
580
|
-
# return a stream that will read the contents of the local item
|
581
|
-
# local gets pulled by the S3Node update fn, due to how http streaming is implemented
|
582
|
-
def stream
|
583
|
-
begin
|
584
|
-
# 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
|
585
|
-
if FileTest.symlink?(@path) or FileTest.directory?(@path)
|
586
|
-
tf = Tempfile.new('s3sync')
|
587
|
-
if FileTest.symlink?(@path)
|
588
|
-
tf.printf('%s', File.readlink(@path))
|
589
|
-
elsif FileTest.directory?(@path)
|
590
|
-
tf.printf('%s', $S3syncDirString)
|
591
|
-
end
|
592
|
-
tf.close
|
593
|
-
tf.open
|
594
|
-
tf
|
595
|
-
elsif FileTest.file?(@path)
|
596
|
-
File.open(@path, 'rb')
|
597
|
-
end
|
598
|
-
rescue SystemCallError
|
599
|
-
$stderr.puts "Could not read #{@path}: #{$!}"
|
600
|
-
raise
|
601
|
-
end
|
602
|
-
end
|
603
|
-
def stat
|
604
|
-
FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
|
605
|
-
end
|
606
|
-
def exist?
|
607
|
-
FileTest.exist?(@path) or FileTest.symlink?(@path)
|
608
|
-
end
|
609
|
-
def owner
|
610
|
-
self.exist? ? self.stat().uid : 0
|
611
|
-
end
|
612
|
-
def group
|
613
|
-
self.exist? ? self.stat().gid : 0
|
614
|
-
end
|
615
|
-
def permissions
|
616
|
-
self.exist? ? self.stat().mode : 600
|
617
|
-
end
|
618
|
-
def updateFrom(fromNode)
|
619
|
-
if fromNode.respond_to?(:to_stream)
|
620
|
-
fName = @path + '.s3syncTemp'
|
621
|
-
# handle the case where the user wants us to create dirs that don't exist in S3
|
622
|
-
if $S3syncOptions['--make-dirs']
|
623
|
-
# ensure target's path exists
|
624
|
-
dirs = @path.split('/')
|
625
|
-
# but the last one is a file name
|
626
|
-
dirs.pop()
|
627
|
-
current = ''
|
628
|
-
dirs.each do |dir|
|
629
|
-
current << dir << '/'
|
630
|
-
begin
|
631
|
-
Dir.mkdir(current) unless FileTest.exist?(current)
|
632
|
-
rescue SystemCallError
|
633
|
-
$stderr.puts "Could not mkdir #{current}: #{$!}"
|
634
|
-
end
|
635
|
-
end
|
636
|
-
end
|
637
|
-
unless fromNode.directory?
|
638
|
-
f = File.open(fName, 'wb')
|
639
|
-
f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
|
35
|
+
# Time to load config and see if we've got everything we need to cook our salad
|
36
|
+
begin
|
37
|
+
conf.read
|
38
|
+
rescue S3Sync::NoConfigFound => exc
|
39
|
+
# We can't proceed without having those two vars set
|
40
|
+
$stderr.puts "You didn't set up the following environment variables:"
|
41
|
+
$stderr.puts
|
42
|
+
exc.missing_vars.each {|var| $stderr.puts " * #{var}"}
|
43
|
+
$stderr.puts
|
640
44
|
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
File.unlink(@path) if File.exist?(@path)
|
646
|
-
if fromNode.symlink?
|
647
|
-
linkTo = ''
|
648
|
-
File.open(fName, 'rb'){|f| linkTo = f.read}
|
649
|
-
debug("#{@path} will be a symlink to #{linkTo}")
|
650
|
-
begin
|
651
|
-
File.symlink(linkTo, @path)
|
652
|
-
rescue NotImplementedError
|
653
|
-
# windows doesn't do symlinks, for example
|
654
|
-
# just bail
|
655
|
-
File.unlink(fName) if File.exist?(fName)
|
656
|
-
return
|
657
|
-
rescue SystemCallError
|
658
|
-
$stderr.puts "Could not write symlink #{@path}: #{$!}"
|
659
|
-
end
|
660
|
-
elsif fromNode.directory?
|
661
|
-
# only get here when the dir doesn't exist. else they'd compare ==
|
662
|
-
debug(@path)
|
663
|
-
begin
|
664
|
-
Dir.mkdir(@path) unless FileTest.exist?(@path)
|
665
|
-
rescue SystemCallError
|
666
|
-
$stderr.puts "Could not mkdir #{@path}: #{$!}"
|
667
|
-
end
|
668
|
-
|
669
|
-
else
|
670
|
-
begin
|
671
|
-
File.rename(fName, @path)
|
672
|
-
rescue SystemCallError
|
673
|
-
$stderr.puts "Could not write (rename) #{@path}: #{$!}"
|
674
|
-
end
|
675
|
-
|
676
|
-
end
|
677
|
-
# clean up if the temp file is still there (as for links)
|
678
|
-
File.unlink(fName) if File.exist?(fName)
|
679
|
-
|
680
|
-
# update permissions
|
681
|
-
linkCommand = fromNode.symlink? ? 'l' : ''
|
682
|
-
begin
|
683
|
-
File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
|
684
|
-
File.send(linkCommand + 'chmod', fromNode.permissions, @path)
|
685
|
-
rescue NotImplementedError
|
686
|
-
# no one has lchmod, but who really cares
|
687
|
-
rescue SystemCallError
|
688
|
-
$stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
|
689
|
-
end
|
690
|
-
else
|
691
|
-
raise "Node provided as update source doesn't support :to_stream"
|
692
|
-
end
|
693
|
-
end
|
694
|
-
def symlink?()
|
695
|
-
FileTest.symlink?(@path)
|
696
|
-
end
|
697
|
-
def delete
|
698
|
-
# don't try to delete the restore root dir
|
699
|
-
# this is a quick fix to deal with the fact that the tree recurse has to visit the root node
|
700
|
-
return unless @name != ''
|
701
|
-
return unless FileTest.exist?(@path)
|
702
|
-
begin
|
703
|
-
if FileTest.directory?(@path)
|
704
|
-
Dir.rmdir(@path)
|
705
|
-
else
|
706
|
-
File.unlink(@path)
|
707
|
-
end
|
708
|
-
rescue SystemCallError
|
709
|
-
$stderr.puts "Could not delete #{@path}: #{$!}"
|
710
|
-
end
|
711
|
-
end
|
712
|
-
end
|
713
|
-
|
714
|
-
|
715
|
-
end #module
|
45
|
+
$stderr.puts "I tried to load a config file from the following paths:"
|
46
|
+
$stderr.puts
|
47
|
+
exc.paths_checked.each {|path| $stderr.puts " * #{path}"}
|
48
|
+
$stderr.puts
|
716
49
|
|
717
|
-
|
718
|
-
|
50
|
+
$stderr.puts "You could try to set the `S3SYNC_PATH' environment variable"
|
51
|
+
$stderr.puts "pointing to a file to be loaded as your config file or just"
|
52
|
+
$stderr.puts "export those variables to your environment like this:"
|
53
|
+
$stderr.puts
|
54
|
+
exc.missing_vars.each {|var|
|
55
|
+
$stderr.puts " $ export #{var}=<value-provided-by-amazon>"
|
56
|
+
}
|
57
|
+
$stderr.puts
|
58
|
+
$stderr.puts "Learn how to do that here: https://github.com/clarete/s3sync"
|
59
|
+
exit
|
719
60
|
end
|
720
61
|
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
62
|
+
# Step aside, the star of this show is here. Let's try to create the
|
63
|
+
# environment to run the requested command. And feed the user back if
|
64
|
+
# information needed was not enough
|
65
|
+
begin
|
66
|
+
S3Sync::CLI::run conf
|
67
|
+
rescue S3Sync::FailureFeedback => exc
|
68
|
+
$stderr.puts exc.message
|
69
|
+
exit 1
|
70
|
+
rescue S3Sync::WrongUsage => exc
|
71
|
+
$stderr.puts "Error:\n #{exc.msg}\n" if exc.msg
|
72
|
+
exit exc.error_code
|
73
|
+
rescue Interrupt
|
74
|
+
$stderr.puts "Interrupted"
|
75
|
+
exit 1
|
76
|
+
end
|