mgreenly-s3sync 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,724 @@
1
+ #!/usr/bin/env ruby
2
+ # This software code is made available "AS IS" without warranties of any
3
+ # kind. You may copy, display, modify and redistribute the software
4
+ # code either by itself or as incorporated into your code; provided that
5
+ # you do not remove any proprietary notices. Your use of this software
6
+ # code is at your own risk and you waive any claim against the author
7
+ # with respect to your use of this software code.
8
+ # (c) 2007 s3sync.net
9
+ #
10
+
11
+ module S3sync
12
+
13
+ $S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
14
+
15
+ $S3SYNC_VERSION = '1.2.4'
16
+
17
+ # always look "here" for include files (thanks aktxyz)
18
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
19
+
20
+ require 'getoptlong'
21
+ #require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
22
+ require 'thread_generator' # memory doesn't leak with this one, at least nothing near as bad
23
+ require 'md5'
24
+ require 'tempfile'
25
+ require 's3try'
26
+
27
+ # after other mods, so we don't overwrite yaml vals with defaults
28
+ require 's3config'
29
+ include S3Config
30
+
31
+ $S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
32
+ $S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
33
+ $S3syncDirFile = Tempfile.new("s3sync")
34
+ $S3syncDirFile.puts $S3syncDirString
35
+ $S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
36
+
37
+ if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
38
+ File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
39
+ $mimeTypes = {}
40
+ f.each_line do |l|
41
+ if l =~ /^(\w\S+)\s+(\S.*)$/
42
+ type = $1
43
+ exts = $2.split
44
+ exts.each do |e|
45
+ $mimeTypes[e.to_s] = type.to_s
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ def S3sync.main
53
+ # ---------- OPTIONS PROCESSING ---------- #
54
+
55
+ $S3syncOptions = Hash.new
56
+ optionsParser = GetoptLong.new(
57
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
58
+ [ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
59
+ [ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
60
+ [ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
61
+ [ '--delete', GetoptLong::NO_ARGUMENT ],
62
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
63
+ [ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
64
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
65
+ [ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
66
+ [ '--progress', GetoptLong::NO_ARGUMENT ],
67
+ [ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
68
+ [ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
69
+ [ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
70
+ [ '--make-dirs', GetoptLong::NO_ARGUMENT ]
71
+ )
72
+
73
+ def S3sync.usage(message = nil)
74
+ $stderr.puts message if message
75
+ name = $0.split('/').last
76
+ $stderr.puts <<"ENDUSAGE"
77
+ #{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
78
+ --help -h --verbose -v --dryrun -n
79
+ --ssl -s --recursive -r --delete
80
+ --public-read -p --expires="<exp>" --cache-control="<cc>"
81
+ --exclude="<regexp>" --progress --debug -d
82
+ --make-dirs
83
+ One of <source> or <destination> must be of S3 format, the other a local path.
84
+ Reminders:
85
+ * An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
86
+ mybucket:mypre/some/key/name
87
+ * Local paths should always use forward slashes '/' even on Windows
88
+ * Whether you use a trailing slash on the source path makes a difference.
89
+ * For examples see README.
90
+ ENDUSAGE
91
+ exit
92
+ end #usage
93
+
94
+ begin
95
+ optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
96
+ rescue StandardError
97
+ usage # the parser already printed an error message
98
+ end
99
+ usage if $S3syncOptions['--help']
100
+ $S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
101
+ $S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
102
+
103
+
104
+ # ---------- CONNECT ---------- #
105
+ S3sync::s3trySetup
106
+
107
+ # ---------- PREFIX PROCESSING ---------- #
108
+
109
+ def S3sync.s3Prefix?(pre)
110
+ # allow for dos-like things e.g. C:\ to be treated as local even with colon
111
+ pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
112
+ end
113
+ sourcePrefix, destinationPrefix = ARGV
114
+ usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
115
+ usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
116
+ usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
117
+ usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
118
+
119
+ # so we can modify them
120
+ sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
121
+
122
+ # handle trailing slash for source properly
123
+ if(sourcePrefix !~ %r{/$})
124
+ # no slash on end of source means we need to append the last src dir to dst prefix
125
+ # testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
126
+ slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
127
+ # not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
128
+ # take everything at the end after a slash or colon
129
+ destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
130
+ end
131
+ # no trailing slash on dest, ever.
132
+ destinationPrefix.sub!(%r{/$}, "")
133
+
134
+ # don't repeat slashes
135
+ sourcePrefix.squeeze!('/')
136
+ destinationPrefix.squeeze!('/')
137
+
138
+ # here's where we find out what direction we're going
139
+ sourceIsS3 = s3Prefix?(sourcePrefix)
140
+ # alias these variables to the other strings (in ruby = does not make copies of strings)
141
+ s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
142
+ localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
143
+
144
+ # canonicalize the S3 stuff
145
+ s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
146
+ s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
147
+ debug("s3Prefix #{s3Prefix}")
148
+ $S3SyncOriginalS3Prefix = s3Prefix.dup
149
+
150
+ # canonicalize the local stuff
151
+ # but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
152
+ # it will get re-stripped by the local generator after expressing this knowledge
153
+ localTrailingSlash = localPrefix.match(%r{/$})
154
+ localPrefix.replace(File.expand_path(localPrefix))
155
+ localPrefix += '/' if localTrailingSlash
156
+ debug("localPrefix #{localPrefix}")
157
+ # used for exclusion parsing
158
+ $S3SyncOriginalLocalPrefix = localPrefix.dup
159
+
160
+ # exclude preparation
161
+ # we don't want to build then throw away this regexp for each node in the universe; do it once globally
162
+ $S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
163
+
164
+
165
+ # ---------- GENERATORS ---------- #
166
+
167
+
168
+ # a generator that will return the files/dirs of the local tree one by one
169
+ # sorted and decorated for easy comparison with the S3 tree
170
+ localTree = Generator.new do |g|
171
+ def S3sync.localTreeRecurse(g, prefix, path)
172
+ debug("localTreeRecurse #{prefix} #{path}")
173
+ #if $S3syncOptions['--memory']
174
+ # $stderr.puts "Starting local recurse"
175
+ # stats = ostats stats
176
+ #end
177
+ d = nil
178
+ begin
179
+ slash = prefix.empty? ? "" : "/"
180
+ d = Dir.new(prefix + slash + path)
181
+ rescue Errno::ENOENT
182
+ # ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
183
+ return nil
184
+ rescue Errno::EACCES
185
+ # vista won't even let us touch some stuff in our own profile
186
+ return nil
187
+ end
188
+ # do some pre-processing
189
+ # the following sleight of hand is to make the recursion match the way s3 sorts
190
+ # take for example the directory 'foo' and the file 'foo.bar'
191
+ # when we encounter the dir we would want to recurse into it
192
+ # but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
193
+ # and the contents in that 'dir'
194
+ #
195
+ # so the solution is to not recurse into the directory until the point where
196
+ # it would come up "next" in the S3 list
197
+ # We have to do these hoops on the local side, because we have very little control
198
+ # over how S3 will return its results
199
+ toAdd = Array.new
200
+ d.each do |name|
201
+ slash = path.empty? ? "" : "/"
202
+ partialPath = path + slash + name
203
+ slash = prefix.empty? ? "" : "/"
204
+ fullPath = prefix + slash + partialPath
205
+ if name == "." or name == ".."
206
+ # skip
207
+ else
208
+ # add a dir node if appropriate
209
+ debug("Test #{fullPath}")
210
+ if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
211
+ debug("Adding it as a dir node")
212
+ toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
213
+ end
214
+ end
215
+ end
216
+ dItems = d.collect + toAdd
217
+ d.close
218
+ d = toAdd = nil
219
+ dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
220
+ dItems.each do |name|
221
+ isDirNode = false
222
+ if name.rindex('/') == name.length-1
223
+ name = name.slice(0...name.length-1)
224
+ isDirNode = true
225
+ debug("#{name} is a dir node")
226
+ end
227
+ slash = path.empty? ? "" : "/"
228
+ partialPath = path + slash + name
229
+ slash = prefix.empty? ? "" : "/"
230
+ fullPath = prefix + slash + partialPath
231
+ excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
232
+ if name == "." or name == ".."
233
+ # skip
234
+ elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
235
+ debug("skipping local item #{excludePath} because of --exclude")
236
+ elsif isDirNode
237
+ localTreeRecurse(g, prefix, partialPath)
238
+ else
239
+ # a normal looking node we should try to process
240
+ debug("local item #{fullPath}")
241
+ g.yield(LocalNode.new(prefix, partialPath))
242
+ end
243
+ end
244
+ #if $S3syncOptions['--memory']
245
+ # $stderr.puts "Ending local recurse"
246
+ # stats = ostats stats
247
+ #end
248
+ end
249
+ # a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
250
+ # so we need to think harder about what the user really meant in the command line.
251
+ localPrefixTrim = localPrefix
252
+ if localPrefix !~ %r{/$}
253
+ # no trailing slash, so yield the root itself first, then recurse if appropriate
254
+ # gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
255
+ g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
256
+ localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
257
+ else
258
+ # trailing slash, so ignore the root itself, and just go into the first level
259
+ localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
260
+ localTreeRecurse(g, localPrefixTrim, "")
261
+ end
262
+ end
263
+
264
+ # a generator that will return the nodes in the S3 tree one by one
265
+ # sorted and decorated for easy comparison with the local tree
266
+ s3Tree = Generator.new do |g|
267
+ def S3sync.s3TreeRecurse(g, bucket, prefix, path)
268
+ if $S3syncOptions['--memory']
269
+ $stderr.puts "Starting S3 recurse"
270
+ GC.start
271
+ stats = ostats stats
272
+ end
273
+ $stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
274
+ nextPage = true
275
+ marker = ''
276
+ while nextPage do
277
+ fullPrefix = prefix + path
278
+ debug("nextPage: #{marker}") if marker != ''
279
+ options = {}
280
+ options['prefix'] = fullPrefix # start at the right depth
281
+ options['delimiter'] = '/' # only one dir at a time please
282
+ options['max-keys'] = '200' # use manageable chunks
283
+ options['marker'] = marker unless marker == ''
284
+ d = S3sync.S3try(:list_bucket, bucket, options)
285
+ $stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
286
+ # the 'directories' and leaf nodes are in two separate collections
287
+ # because a dir will never have the same name as a node, we can just shove them together and sort
288
+ # it's important to evaluate them alphabetically for efficient comparison to the local tree
289
+ tItems = d.entries + d.common_prefix_entries
290
+ tItems.sort! do |a,b|
291
+ aName = a.respond_to?('key') ? a.key : a.prefix
292
+ bName = b.respond_to?('key') ? b.key : b.prefix
293
+ # the full path will be returned, efficient to ignore the part we know will be in common
294
+ aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
295
+ end
296
+ # get rid of the big s3 objects asap, just save light-weight nodes and strings
297
+ items = tItems.collect do |item|
298
+ if item.respond_to?('key')
299
+ key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
300
+ Node.new(key, item.size, item.etag)
301
+ else
302
+ Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
303
+ end
304
+ end
305
+ nextPage = d.properties.is_truncated
306
+ marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
307
+ # get this into native char set (because when we feed it back to s3 that's what it will expect)
308
+ marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
309
+ tItems = nil
310
+ d = nil # get rid of this before recursing; it's big
311
+ item = nil
312
+ GC.start # not sure but I think yielding before doing this is causing evil closure bloat
313
+ items.each do |item|
314
+ if not (item.kind_of? String)
315
+ # this is an item
316
+ excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
317
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
318
+ debug("skipping S3 item #{excludePath} due to --exclude")
319
+ else
320
+ debug("S3 item #{item.name}")
321
+ g.yield(S3Node.new(bucket, prefix, item))
322
+ end
323
+ else
324
+ # it's a prefix (i.e. there are sub keys)
325
+ partialPath = item.slice(prefix.length..item.length) # will have trailing slash
326
+ excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
327
+ # recurse
328
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
329
+ debug("skipping prefix #{excludePath} due to --exclude")
330
+ else
331
+ debug("prefix found: #{partialPath}")
332
+ s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
333
+ end
334
+ end
335
+ end
336
+ items = nil
337
+ end # of while nextPage
338
+ if $S3syncOptions['--memory']
339
+ $stderr.puts "Ending S3 recurse"
340
+ GC.start
341
+ stats = ostats stats
342
+ end
343
+ end
344
+ # this will yield the root node first and then recurse
345
+ s3TreeRecurse(g, s3Bucket, s3Prefix, "")
346
+
347
+ end
348
+
349
+ # alias the tree objects so we don't care below which direction the transfer is going
350
+ if sourceIsS3
351
+ sourceTree, destinationTree = s3Tree, localTree
352
+ else
353
+ sourceTree, destinationTree = localTree, s3Tree
354
+ end
355
+
356
+
357
+ # ---------- COMPARATOR ---------- #
358
+
359
+ # run the comparison engine and act according to what we find for each check
360
+ nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
361
+
362
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
363
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
364
+ while sourceNode or destinationNode do
365
+ debug("source: #{sourceNode.name}") if sourceNode
366
+ debug("dest: #{destinationNode.name}") if destinationNode
367
+ if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
368
+ dNode =
369
+ if sourceNode.kind_of? LocalNode
370
+ S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
371
+ else
372
+ LocalNode.new(localPrefix, sourceNode.name)
373
+ end
374
+ puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
375
+ dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
376
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
377
+ elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
378
+ $stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
379
+ if $S3syncOptions['--delete']
380
+ if destinationNode.directory?
381
+ # have to wait
382
+ nodesToDelete.push(destinationNode)
383
+ else
384
+ puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
385
+ destinationNode.delete unless $S3syncOptions['--dryrun']
386
+ end
387
+ end
388
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
389
+ elsif sourceNode.name == destinationNode.name
390
+ if (sourceNode.size != destinationNode.size) or (sourceNode.tag != destinationNode.tag)
391
+ puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
392
+ destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
393
+ elsif $S3syncOptions['--debug']
394
+ $stderr.puts "Node #{sourceNode.name} unchanged"
395
+ end
396
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
397
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
398
+ end
399
+ end
400
+
401
+ # get rid of the (now empty, except for other directories) directories
402
+ nodesToDelete.reverse_each do |node|
403
+ puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
404
+ node.delete unless $S3syncOptions['--dryrun']
405
+ end
406
+
407
+ end #main
408
+
409
+
410
+
411
+ # ---------- NODE ---------- #
412
+
413
+ class Node
414
+ attr_reader :name
415
+ attr_reader :size
416
+ attr_reader :tag
417
+ def initialize(name='', size = 0, tag = '')
418
+ @name = name
419
+ @size = size
420
+ @tag = tag
421
+ end
422
+ def directory?()
423
+ @tag == $S3syncDirTag and @size == $S3syncDirString.length
424
+ end
425
+ end
426
+
427
+ # ---------- S3Node ---------- #
428
+
429
+ class S3Node < Node
430
+ @path = nil
431
+ @bucket = nil
432
+ @result = nil
433
+ def initialize(bucket, prefix, itemOrName)
434
+ @bucket = bucket
435
+ if itemOrName.kind_of? String
436
+ @name = itemOrName
437
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
438
+ #6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
439
+ if (not prefix.empty? and @name.empty?)
440
+ @name = prefix
441
+ itemOrName = prefix
442
+ prefix = ""
443
+ end
444
+ slash = prefix.empty? ? "" : "/"
445
+ @path = prefix + slash + itemOrName
446
+ else
447
+ @name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
448
+ # depending whether the prefix is / tailed, the name might need trimming
449
+ @name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
450
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
451
+ @path = itemOrName.name
452
+ @path.sub!(%r{/$}, "") # don't create directories with a slash on the end
453
+ @size = itemOrName.size
454
+ @tag = itemOrName.tag.gsub(/"/,'')
455
+ end
456
+ debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag}")
457
+ end
458
+ # get this item from s3 into the provided stream
459
+ # S3 pushes to the local item, due to how http streaming is implemented
460
+ def to_stream(s)
461
+ @result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
462
+ end
463
+ def symlink?()
464
+ unless @result
465
+ @result = S3sync.S3try(:head, @bucket, @path)
466
+ end
467
+ debug("symlink value is: #{@result.object.metadata['symlink']}")
468
+ @result.object.metadata['symlink'] == 'true'
469
+ end
470
+ def owner
471
+ unless @result
472
+ @result = S3sync.S3try(:head, @bucket, @path)
473
+ end
474
+ debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
475
+ @result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
476
+ end
477
+ def group
478
+ unless @result
479
+ @result = S3sync.S3try(:head, @bucket, @path)
480
+ end
481
+ @result.object.metadata['group'].to_i # 0 default ok
482
+ end
483
+ def permissions
484
+ g = @result.object.metadata['permissions']
485
+ g ? g.to_i : 600 # default to owner only
486
+ end
487
+ def updateFrom(fromNode)
488
+ if fromNode.respond_to?(:stream)
489
+ meta = Hash.new
490
+ meta['owner'] = fromNode.owner.to_s
491
+ meta['group'] = fromNode.group.to_s
492
+ meta['permissions'] = fromNode.permissions.to_s
493
+ meta['symlink'] = 'true' if fromNode.symlink?
494
+ begin
495
+ theStream = fromNode.stream
496
+ theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
497
+
498
+ s3o = S3::S3Object.new(theStream, meta)
499
+ debug(@path)
500
+ headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
501
+ headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
502
+ headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
503
+ headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
504
+ fType = @path.split('.').last
505
+ debug("File extension: #{fType}")
506
+ if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
507
+ debug("Mime type: #{mType}")
508
+ headers['Content-Type'] = mType
509
+ end
510
+ @result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
511
+ theStream.close if (theStream and not theStream.closed?)
512
+ rescue NoMethodError
513
+ # when --progress is used and we can't get the stream object, it doesn't report as null
514
+ # so the above .closed? test will break
515
+ $stderr.puts "Skipping #{@path}: " + $!
516
+ rescue SystemCallError
517
+ theStream.close if (theStream and not theStream.closed?)
518
+ $stderr.puts "Skipping #{@path}: " + $!
519
+ end
520
+ else
521
+ raise "Node provided as update source doesn't support :stream"
522
+ end
523
+ end
524
+ def delete
525
+ @result = S3sync.S3try(:delete, @bucket, @path)
526
+ end
527
+ end
528
+
529
+ # ---------- LocalNode ---------- #
530
+
531
+ class LocalNode < Node
532
+ @path = nil
533
+ def initialize(prefix, partialPath)
534
+ slash = prefix.empty? ? "" : "/"
535
+ @path = prefix + slash + partialPath
536
+ # slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
537
+ @name = partialPath or ''
538
+ if FileTest.symlink?(@path)
539
+ # this could use the 'file' case below, but why create an extra temp file
540
+ linkData = File.readlink(@path)
541
+ $stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
542
+ @size = linkData.length
543
+ md5 = Digest::MD5.new()
544
+ md5 << linkData
545
+ @tag = md5.hexdigest
546
+ elsif FileTest.file?(@path)
547
+ @size = FileTest.size(@path)
548
+ data = nil
549
+ begin
550
+ data = self.stream
551
+ md5 = Digest::MD5.new()
552
+ while !data.eof?
553
+ md5 << data.read(2048) # stream so it's not taking all memory
554
+ end
555
+ data.close
556
+ @tag = md5.hexdigest
557
+ rescue SystemCallError
558
+ # well we're not going to have an md5 that's for sure
559
+ @tag = nil
560
+ end
561
+ elsif FileTest.directory?(@path)
562
+ # all s3 directories are dummy nodes contain the same directory string
563
+ # so for easy comparison, set our size and tag thusly
564
+ @size = $S3syncDirString.length
565
+ @tag = $S3syncDirTag
566
+ end
567
+ debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag}")
568
+ end
569
+ # return a stream that will read the contents of the local item
570
+ # local gets pulled by the S3Node update fn, due to how http streaming is implemented
571
+ def stream
572
+ begin
573
+ # 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
574
+ if FileTest.symlink?(@path) or FileTest.directory?(@path)
575
+ tf = Tempfile.new('s3sync')
576
+ if FileTest.symlink?(@path)
577
+ tf.printf('%s', File.readlink(@path))
578
+ elsif FileTest.directory?(@path)
579
+ tf.printf('%s', $S3syncDirString)
580
+ end
581
+ tf.close
582
+ tf.open
583
+ tf
584
+ elsif FileTest.file?(@path)
585
+ File.open(@path, 'rb')
586
+ end
587
+ rescue SystemCallError
588
+ $stderr.puts "Could not read #{@path}: #{$!}"
589
+ raise
590
+ end
591
+ end
592
+ def stat
593
+ FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
594
+ end
595
+ def exist?
596
+ FileTest.exist?(@path) or FileTest.symlink?(@path)
597
+ end
598
+ def owner
599
+ self.exist? ? self.stat().uid : 0
600
+ end
601
+ def group
602
+ self.exist? ? self.stat().gid : 0
603
+ end
604
+ def permissions
605
+ self.exist? ? self.stat().mode : 600
606
+ end
607
+ def updateFrom(fromNode)
608
+ if fromNode.respond_to?(:to_stream)
609
+ fName = @path + '.s3syncTemp'
610
+ # handle the case where the user wants us to create dirs that don't exist in S3
611
+ if $S3syncOptions['--make-dirs']
612
+ # ensure target's path exists
613
+ dirs = @path.split('/')
614
+ # but the last one is a file name
615
+ dirs.pop()
616
+ current = ''
617
+ dirs.each do |dir|
618
+ current << dir << '/'
619
+ begin
620
+ Dir.mkdir(current) unless FileTest.exist?(current)
621
+ rescue SystemCallError
622
+ $stderr.puts "Could not mkdir #{current}: #{$!}"
623
+ end
624
+ end
625
+ end
626
+ unless fromNode.directory?
627
+ f = File.open(fName, 'wb')
628
+ f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
629
+
630
+ fromNode.to_stream(f)
631
+ f.close
632
+ end
633
+ # get original item out of the way
634
+ File.unlink(@path) if File.exist?(@path)
635
+ if fromNode.symlink?
636
+ linkTo = ''
637
+ File.open(fName, 'rb'){|f| linkTo = f.read}
638
+ debug("#{@path} will be a symlink to #{linkTo}")
639
+ begin
640
+ File.symlink(linkTo, @path)
641
+ rescue NotImplementedError
642
+ # windows doesn't do symlinks, for example
643
+ # just bail
644
+ File.unlink(fName) if File.exist?(fName)
645
+ return
646
+ rescue SystemCallError
647
+ $stderr.puts "Could not write symlink #{@path}: #{$!}"
648
+ end
649
+ elsif fromNode.directory?
650
+ # only get here when the dir doesn't exist. else they'd compare ==
651
+ debug(@path)
652
+ begin
653
+ Dir.mkdir(@path) unless FileTest.exist?(@path)
654
+ rescue SystemCallError
655
+ $stderr.puts "Could not mkdir #{@path}: #{$!}"
656
+ end
657
+
658
+ else
659
+ begin
660
+ File.rename(fName, @path)
661
+ rescue SystemCallError
662
+ $stderr.puts "Could not write (rename) #{@path}: #{$!}"
663
+ end
664
+
665
+ end
666
+ # clean up if the temp file is still there (as for links)
667
+ File.unlink(fName) if File.exist?(fName)
668
+
669
+ # update permissions
670
+ linkCommand = fromNode.symlink? ? 'l' : ''
671
+ begin
672
+ File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
673
+ File.send(linkCommand + 'chmod', fromNode.permissions, @path)
674
+ rescue NotImplementedError
675
+ # no one has lchmod, but who really cares
676
+ rescue SystemCallError
677
+ $stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
678
+ end
679
+ else
680
+ raise "Node provided as update source doesn't support :to_stream"
681
+ end
682
+ end
683
+ def symlink?()
684
+ FileTest.symlink?(@path)
685
+ end
686
+ def delete
687
+ # don't try to delete the restore root dir
688
+ # this is a quick fix to deal with the fact that the tree recurse has to visit the root node
689
+ return unless @name != ''
690
+ return unless FileTest.exist?(@path)
691
+ begin
692
+ if FileTest.directory?(@path)
693
+ Dir.rmdir(@path)
694
+ else
695
+ File.unlink(@path)
696
+ end
697
+ rescue SystemCallError
698
+ $stderr.puts "Could not delete #{@path}: #{$!}"
699
+ end
700
+ end
701
+ end
702
+
703
+
704
+ end #module
705
+
706
+ def debug(str)
707
+ $stderr.puts str if $S3syncOptions['--debug']
708
+ end
709
+
710
+ def ostats(last_stat = nil)
711
+ stats = Hash.new(0)
712
+ ObjectSpace.each_object {|o| stats[o.class] += 1}
713
+
714
+ stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
715
+ $stderr.printf "%-30s %10d", k, v
716
+ $stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
717
+ $stderr.puts
718
+ end
719
+
720
+ stats
721
+ end
722
+
723
+ # go!
724
+ S3sync::main