mgreenly-s3sync 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,724 @@
1
+ #!/usr/bin/env ruby
2
+ # This software code is made available "AS IS" without warranties of any
3
+ # kind. You may copy, display, modify and redistribute the software
4
+ # code either by itself or as incorporated into your code; provided that
5
+ # you do not remove any proprietary notices. Your use of this software
6
+ # code is at your own risk and you waive any claim against the author
7
+ # with respect to your use of this software code.
8
+ # (c) 2007 s3sync.net
9
+ #
10
+
11
+ module S3sync
12
+
13
+ $S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
14
+
15
+ $S3SYNC_VERSION = '1.2.4'
16
+
17
+ # always look "here" for include files (thanks aktxyz)
18
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
19
+
20
+ require 'getoptlong'
21
+ #require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
22
+ require 'thread_generator' # memory doesn't leak with this one, at least nothing near as bad
23
+ require 'md5'
24
+ require 'tempfile'
25
+ require 's3try'
26
+
27
+ # after other mods, so we don't overwrite yaml vals with defaults
28
+ require 's3config'
29
+ include S3Config
30
+
31
+ $S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
32
+ $S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
33
+ $S3syncDirFile = Tempfile.new("s3sync")
34
+ $S3syncDirFile.puts $S3syncDirString
35
+ $S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
36
+
37
+ if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
38
+ File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
39
+ $mimeTypes = {}
40
+ f.each_line do |l|
41
+ if l =~ /^(\w\S+)\s+(\S.*)$/
42
+ type = $1
43
+ exts = $2.split
44
+ exts.each do |e|
45
+ $mimeTypes[e.to_s] = type.to_s
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ def S3sync.main
53
+ # ---------- OPTIONS PROCESSING ---------- #
54
+
55
+ $S3syncOptions = Hash.new
56
+ optionsParser = GetoptLong.new(
57
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
58
+ [ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
59
+ [ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
60
+ [ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
61
+ [ '--delete', GetoptLong::NO_ARGUMENT ],
62
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
63
+ [ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
64
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
65
+ [ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
66
+ [ '--progress', GetoptLong::NO_ARGUMENT ],
67
+ [ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
68
+ [ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
69
+ [ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
70
+ [ '--make-dirs', GetoptLong::NO_ARGUMENT ]
71
+ )
72
+
73
+ def S3sync.usage(message = nil)
74
+ $stderr.puts message if message
75
+ name = $0.split('/').last
76
+ $stderr.puts <<"ENDUSAGE"
77
+ #{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
78
+ --help -h --verbose -v --dryrun -n
79
+ --ssl -s --recursive -r --delete
80
+ --public-read -p --expires="<exp>" --cache-control="<cc>"
81
+ --exclude="<regexp>" --progress --debug -d
82
+ --make-dirs
83
+ One of <source> or <destination> must be of S3 format, the other a local path.
84
+ Reminders:
85
+ * An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
86
+ mybucket:mypre/some/key/name
87
+ * Local paths should always use forward slashes '/' even on Windows
88
+ * Whether you use a trailing slash on the source path makes a difference.
89
+ * For examples see README.
90
+ ENDUSAGE
91
+ exit
92
+ end #usage
93
+
94
+ begin
95
+ optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
96
+ rescue StandardError
97
+ usage # the parser already printed an error message
98
+ end
99
+ usage if $S3syncOptions['--help']
100
+ $S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
101
+ $S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
102
+
103
+
104
+ # ---------- CONNECT ---------- #
105
+ S3sync::s3trySetup
106
+
107
+ # ---------- PREFIX PROCESSING ---------- #
108
+
109
+ def S3sync.s3Prefix?(pre)
110
+ # allow for dos-like things e.g. C:\ to be treated as local even with colon
111
+ pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
112
+ end
113
+ sourcePrefix, destinationPrefix = ARGV
114
+ usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
115
+ usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
116
+ usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
117
+ usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
118
+
119
+ # so we can modify them
120
+ sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
121
+
122
+ # handle trailing slash for source properly
123
+ if(sourcePrefix !~ %r{/$})
124
+ # no slash on end of source means we need to append the last src dir to dst prefix
125
+ # testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
126
+ slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
127
+ # not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
128
+ # take everything at the end after a slash or colon
129
+ destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
130
+ end
131
+ # no trailing slash on dest, ever.
132
+ destinationPrefix.sub!(%r{/$}, "")
133
+
134
+ # don't repeat slashes
135
+ sourcePrefix.squeeze!('/')
136
+ destinationPrefix.squeeze!('/')
137
+
138
+ # here's where we find out what direction we're going
139
+ sourceIsS3 = s3Prefix?(sourcePrefix)
140
+ # alias these variables to the other strings (in ruby = does not make copies of strings)
141
+ s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
142
+ localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
143
+
144
+ # canonicalize the S3 stuff
145
+ s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
146
+ s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
147
+ debug("s3Prefix #{s3Prefix}")
148
+ $S3SyncOriginalS3Prefix = s3Prefix.dup
149
+
150
+ # canonicalize the local stuff
151
+ # but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
152
+ # it will get re-stripped by the local generator after expressing this knowledge
153
+ localTrailingSlash = localPrefix.match(%r{/$})
154
+ localPrefix.replace(File.expand_path(localPrefix))
155
+ localPrefix += '/' if localTrailingSlash
156
+ debug("localPrefix #{localPrefix}")
157
+ # used for exclusion parsing
158
+ $S3SyncOriginalLocalPrefix = localPrefix.dup
159
+
160
+ # exclude preparation
161
+ # we don't want to build then throw away this regexp for each node in the universe; do it once globally
162
+ $S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
163
+
164
+
165
+ # ---------- GENERATORS ---------- #
166
+
167
+
168
+ # a generator that will return the files/dirs of the local tree one by one
169
+ # sorted and decorated for easy comparison with the S3 tree
170
+ localTree = Generator.new do |g|
171
+ def S3sync.localTreeRecurse(g, prefix, path)
172
+ debug("localTreeRecurse #{prefix} #{path}")
173
+ #if $S3syncOptions['--memory']
174
+ # $stderr.puts "Starting local recurse"
175
+ # stats = ostats stats
176
+ #end
177
+ d = nil
178
+ begin
179
+ slash = prefix.empty? ? "" : "/"
180
+ d = Dir.new(prefix + slash + path)
181
+ rescue Errno::ENOENT
182
+ # ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
183
+ return nil
184
+ rescue Errno::EACCES
185
+ # vista won't even let us touch some stuff in our own profile
186
+ return nil
187
+ end
188
+ # do some pre-processing
189
+ # the following sleight of hand is to make the recursion match the way s3 sorts
190
+ # take for example the directory 'foo' and the file 'foo.bar'
191
+ # when we encounter the dir we would want to recurse into it
192
+ # but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
193
+ # and the contents in that 'dir'
194
+ #
195
+ # so the solution is to not recurse into the directory until the point where
196
+ # it would come up "next" in the S3 list
197
+ # We have to do these hoops on the local side, because we have very little control
198
+ # over how S3 will return its results
199
+ toAdd = Array.new
200
+ d.each do |name|
201
+ slash = path.empty? ? "" : "/"
202
+ partialPath = path + slash + name
203
+ slash = prefix.empty? ? "" : "/"
204
+ fullPath = prefix + slash + partialPath
205
+ if name == "." or name == ".."
206
+ # skip
207
+ else
208
+ # add a dir node if appropriate
209
+ debug("Test #{fullPath}")
210
+ if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
211
+ debug("Adding it as a dir node")
212
+ toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
213
+ end
214
+ end
215
+ end
216
+ dItems = d.collect + toAdd
217
+ d.close
218
+ d = toAdd = nil
219
+ dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
220
+ dItems.each do |name|
221
+ isDirNode = false
222
+ if name.rindex('/') == name.length-1
223
+ name = name.slice(0...name.length-1)
224
+ isDirNode = true
225
+ debug("#{name} is a dir node")
226
+ end
227
+ slash = path.empty? ? "" : "/"
228
+ partialPath = path + slash + name
229
+ slash = prefix.empty? ? "" : "/"
230
+ fullPath = prefix + slash + partialPath
231
+ excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
232
+ if name == "." or name == ".."
233
+ # skip
234
+ elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
235
+ debug("skipping local item #{excludePath} because of --exclude")
236
+ elsif isDirNode
237
+ localTreeRecurse(g, prefix, partialPath)
238
+ else
239
+ # a normal looking node we should try to process
240
+ debug("local item #{fullPath}")
241
+ g.yield(LocalNode.new(prefix, partialPath))
242
+ end
243
+ end
244
+ #if $S3syncOptions['--memory']
245
+ # $stderr.puts "Ending local recurse"
246
+ # stats = ostats stats
247
+ #end
248
+ end
249
+ # a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
250
+ # so we need to think harder about what the user really meant in the command line.
251
+ localPrefixTrim = localPrefix
252
+ if localPrefix !~ %r{/$}
253
+ # no trailing slash, so yield the root itself first, then recurse if appropriate
254
+ # gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
255
+ g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
256
+ localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
257
+ else
258
+ # trailing slash, so ignore the root itself, and just go into the first level
259
+ localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
260
+ localTreeRecurse(g, localPrefixTrim, "")
261
+ end
262
+ end
263
+
264
+ # a generator that will return the nodes in the S3 tree one by one
265
+ # sorted and decorated for easy comparison with the local tree
266
+ s3Tree = Generator.new do |g|
267
+ def S3sync.s3TreeRecurse(g, bucket, prefix, path)
268
+ if $S3syncOptions['--memory']
269
+ $stderr.puts "Starting S3 recurse"
270
+ GC.start
271
+ stats = ostats stats
272
+ end
273
+ $stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
274
+ nextPage = true
275
+ marker = ''
276
+ while nextPage do
277
+ fullPrefix = prefix + path
278
+ debug("nextPage: #{marker}") if marker != ''
279
+ options = {}
280
+ options['prefix'] = fullPrefix # start at the right depth
281
+ options['delimiter'] = '/' # only one dir at a time please
282
+ options['max-keys'] = '200' # use manageable chunks
283
+ options['marker'] = marker unless marker == ''
284
+ d = S3sync.S3try(:list_bucket, bucket, options)
285
+ $stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
286
+ # the 'directories' and leaf nodes are in two separate collections
287
+ # because a dir will never have the same name as a node, we can just shove them together and sort
288
+ # it's important to evaluate them alphabetically for efficient comparison to the local tree
289
+ tItems = d.entries + d.common_prefix_entries
290
+ tItems.sort! do |a,b|
291
+ aName = a.respond_to?('key') ? a.key : a.prefix
292
+ bName = b.respond_to?('key') ? b.key : b.prefix
293
+ # the full path will be returned, efficient to ignore the part we know will be in common
294
+ aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
295
+ end
296
+ # get rid of the big s3 objects asap, just save light-weight nodes and strings
297
+ items = tItems.collect do |item|
298
+ if item.respond_to?('key')
299
+ key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
300
+ Node.new(key, item.size, item.etag)
301
+ else
302
+ Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
303
+ end
304
+ end
305
+ nextPage = d.properties.is_truncated
306
+ marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
307
+ # get this into native char set (because when we feed it back to s3 that's what it will expect)
308
+ marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
309
+ tItems = nil
310
+ d = nil # get rid of this before recursing; it's big
311
+ item = nil
312
+ GC.start # not sure but I think yielding before doing this is causing evil closure bloat
313
+ items.each do |item|
314
+ if not (item.kind_of? String)
315
+ # this is an item
316
+ excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
317
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
318
+ debug("skipping S3 item #{excludePath} due to --exclude")
319
+ else
320
+ debug("S3 item #{item.name}")
321
+ g.yield(S3Node.new(bucket, prefix, item))
322
+ end
323
+ else
324
+ # it's a prefix (i.e. there are sub keys)
325
+ partialPath = item.slice(prefix.length..item.length) # will have trailing slash
326
+ excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
327
+ # recurse
328
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
329
+ debug("skipping prefix #{excludePath} due to --exclude")
330
+ else
331
+ debug("prefix found: #{partialPath}")
332
+ s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
333
+ end
334
+ end
335
+ end
336
+ items = nil
337
+ end # of while nextPage
338
+ if $S3syncOptions['--memory']
339
+ $stderr.puts "Ending S3 recurse"
340
+ GC.start
341
+ stats = ostats stats
342
+ end
343
+ end
344
+ # this will yield the root node first and then recurse
345
+ s3TreeRecurse(g, s3Bucket, s3Prefix, "")
346
+
347
+ end
348
+
349
+ # alias the tree objects so we don't care below which direction the transfer is going
350
+ if sourceIsS3
351
+ sourceTree, destinationTree = s3Tree, localTree
352
+ else
353
+ sourceTree, destinationTree = localTree, s3Tree
354
+ end
355
+
356
+
357
+ # ---------- COMPARATOR ---------- #
358
+
359
+ # run the comparison engine and act according to what we find for each check
360
+ nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
361
+
362
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
363
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
364
+ while sourceNode or destinationNode do
365
+ debug("source: #{sourceNode.name}") if sourceNode
366
+ debug("dest: #{destinationNode.name}") if destinationNode
367
+ if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
368
+ dNode =
369
+ if sourceNode.kind_of? LocalNode
370
+ S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
371
+ else
372
+ LocalNode.new(localPrefix, sourceNode.name)
373
+ end
374
+ puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
375
+ dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
376
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
377
+ elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
378
+ $stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
379
+ if $S3syncOptions['--delete']
380
+ if destinationNode.directory?
381
+ # have to wait
382
+ nodesToDelete.push(destinationNode)
383
+ else
384
+ puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
385
+ destinationNode.delete unless $S3syncOptions['--dryrun']
386
+ end
387
+ end
388
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
389
+ elsif sourceNode.name == destinationNode.name
390
+ if (sourceNode.size != destinationNode.size) or (sourceNode.tag != destinationNode.tag)
391
+ puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
392
+ destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
393
+ elsif $S3syncOptions['--debug']
394
+ $stderr.puts "Node #{sourceNode.name} unchanged"
395
+ end
396
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
397
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
398
+ end
399
+ end
400
+
401
+ # get rid of the (now empty, except for other directories) directories
402
+ nodesToDelete.reverse_each do |node|
403
+ puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
404
+ node.delete unless $S3syncOptions['--dryrun']
405
+ end
406
+
407
+ end #main
408
+
409
+
410
+
411
+ # ---------- NODE ---------- #
412
+
413
+ class Node
414
+ attr_reader :name
415
+ attr_reader :size
416
+ attr_reader :tag
417
+ def initialize(name='', size = 0, tag = '')
418
+ @name = name
419
+ @size = size
420
+ @tag = tag
421
+ end
422
+ def directory?()
423
+ @tag == $S3syncDirTag and @size == $S3syncDirString.length
424
+ end
425
+ end
426
+
427
+ # ---------- S3Node ---------- #
428
+
429
+ class S3Node < Node
430
+ @path = nil
431
+ @bucket = nil
432
+ @result = nil
433
+ def initialize(bucket, prefix, itemOrName)
434
+ @bucket = bucket
435
+ if itemOrName.kind_of? String
436
+ @name = itemOrName
437
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
438
+ #6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
439
+ if (not prefix.empty? and @name.empty?)
440
+ @name = prefix
441
+ itemOrName = prefix
442
+ prefix = ""
443
+ end
444
+ slash = prefix.empty? ? "" : "/"
445
+ @path = prefix + slash + itemOrName
446
+ else
447
+ @name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
448
+ # depending whether the prefix is / tailed, the name might need trimming
449
+ @name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
450
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
451
+ @path = itemOrName.name
452
+ @path.sub!(%r{/$}, "") # don't create directories with a slash on the end
453
+ @size = itemOrName.size
454
+ @tag = itemOrName.tag.gsub(/"/,'')
455
+ end
456
+ debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag}")
457
+ end
458
+ # get this item from s3 into the provided stream
459
+ # S3 pushes to the local item, due to how http streaming is implemented
460
+ def to_stream(s)
461
+ @result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
462
+ end
463
+ def symlink?()
464
+ unless @result
465
+ @result = S3sync.S3try(:head, @bucket, @path)
466
+ end
467
+ debug("symlink value is: #{@result.object.metadata['symlink']}")
468
+ @result.object.metadata['symlink'] == 'true'
469
+ end
470
+ def owner
471
+ unless @result
472
+ @result = S3sync.S3try(:head, @bucket, @path)
473
+ end
474
+ debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
475
+ @result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
476
+ end
477
+ def group
478
+ unless @result
479
+ @result = S3sync.S3try(:head, @bucket, @path)
480
+ end
481
+ @result.object.metadata['group'].to_i # 0 default ok
482
+ end
483
+ def permissions
484
+ g = @result.object.metadata['permissions']
485
+ g ? g.to_i : 600 # default to owner only
486
+ end
487
+ def updateFrom(fromNode)
488
+ if fromNode.respond_to?(:stream)
489
+ meta = Hash.new
490
+ meta['owner'] = fromNode.owner.to_s
491
+ meta['group'] = fromNode.group.to_s
492
+ meta['permissions'] = fromNode.permissions.to_s
493
+ meta['symlink'] = 'true' if fromNode.symlink?
494
+ begin
495
+ theStream = fromNode.stream
496
+ theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
497
+
498
+ s3o = S3::S3Object.new(theStream, meta)
499
+ debug(@path)
500
+ headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
501
+ headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
502
+ headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
503
+ headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
504
+ fType = @path.split('.').last
505
+ debug("File extension: #{fType}")
506
+ if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
507
+ debug("Mime type: #{mType}")
508
+ headers['Content-Type'] = mType
509
+ end
510
+ @result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
511
+ theStream.close if (theStream and not theStream.closed?)
512
+ rescue NoMethodError
513
+ # when --progress is used and we can't get the stream object, it doesn't report as null
514
+ # so the above .closed? test will break
515
+ $stderr.puts "Skipping #{@path}: " + $!
516
+ rescue SystemCallError
517
+ theStream.close if (theStream and not theStream.closed?)
518
+ $stderr.puts "Skipping #{@path}: " + $!
519
+ end
520
+ else
521
+ raise "Node provided as update source doesn't support :stream"
522
+ end
523
+ end
524
+ def delete
525
+ @result = S3sync.S3try(:delete, @bucket, @path)
526
+ end
527
+ end
528
+
529
+ # ---------- LocalNode ---------- #
530
+
531
+ class LocalNode < Node
532
+ @path = nil
533
+ def initialize(prefix, partialPath)
534
+ slash = prefix.empty? ? "" : "/"
535
+ @path = prefix + slash + partialPath
536
+ # slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
537
+ @name = partialPath or ''
538
+ if FileTest.symlink?(@path)
539
+ # this could use the 'file' case below, but why create an extra temp file
540
+ linkData = File.readlink(@path)
541
+ $stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
542
+ @size = linkData.length
543
+ md5 = Digest::MD5.new()
544
+ md5 << linkData
545
+ @tag = md5.hexdigest
546
+ elsif FileTest.file?(@path)
547
+ @size = FileTest.size(@path)
548
+ data = nil
549
+ begin
550
+ data = self.stream
551
+ md5 = Digest::MD5.new()
552
+ while !data.eof?
553
+ md5 << data.read(2048) # stream so it's not taking all memory
554
+ end
555
+ data.close
556
+ @tag = md5.hexdigest
557
+ rescue SystemCallError
558
+ # well we're not going to have an md5 that's for sure
559
+ @tag = nil
560
+ end
561
+ elsif FileTest.directory?(@path)
562
+ # all s3 directories are dummy nodes contain the same directory string
563
+ # so for easy comparison, set our size and tag thusly
564
+ @size = $S3syncDirString.length
565
+ @tag = $S3syncDirTag
566
+ end
567
+ debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag}")
568
+ end
569
+ # return a stream that will read the contents of the local item
570
+ # local gets pulled by the S3Node update fn, due to how http streaming is implemented
571
+ def stream
572
+ begin
573
+ # 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
574
+ if FileTest.symlink?(@path) or FileTest.directory?(@path)
575
+ tf = Tempfile.new('s3sync')
576
+ if FileTest.symlink?(@path)
577
+ tf.printf('%s', File.readlink(@path))
578
+ elsif FileTest.directory?(@path)
579
+ tf.printf('%s', $S3syncDirString)
580
+ end
581
+ tf.close
582
+ tf.open
583
+ tf
584
+ elsif FileTest.file?(@path)
585
+ File.open(@path, 'rb')
586
+ end
587
+ rescue SystemCallError
588
+ $stderr.puts "Could not read #{@path}: #{$!}"
589
+ raise
590
+ end
591
+ end
592
+ def stat
593
+ FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
594
+ end
595
+ def exist?
596
+ FileTest.exist?(@path) or FileTest.symlink?(@path)
597
+ end
598
+ def owner
599
+ self.exist? ? self.stat().uid : 0
600
+ end
601
+ def group
602
+ self.exist? ? self.stat().gid : 0
603
+ end
604
+ def permissions
605
+ self.exist? ? self.stat().mode : 600
606
+ end
607
+ def updateFrom(fromNode)
608
+ if fromNode.respond_to?(:to_stream)
609
+ fName = @path + '.s3syncTemp'
610
+ # handle the case where the user wants us to create dirs that don't exist in S3
611
+ if $S3syncOptions['--make-dirs']
612
+ # ensure target's path exists
613
+ dirs = @path.split('/')
614
+ # but the last one is a file name
615
+ dirs.pop()
616
+ current = ''
617
+ dirs.each do |dir|
618
+ current << dir << '/'
619
+ begin
620
+ Dir.mkdir(current) unless FileTest.exist?(current)
621
+ rescue SystemCallError
622
+ $stderr.puts "Could not mkdir #{current}: #{$!}"
623
+ end
624
+ end
625
+ end
626
+ unless fromNode.directory?
627
+ f = File.open(fName, 'wb')
628
+ f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
629
+
630
+ fromNode.to_stream(f)
631
+ f.close
632
+ end
633
+ # get original item out of the way
634
+ File.unlink(@path) if File.exist?(@path)
635
+ if fromNode.symlink?
636
+ linkTo = ''
637
+ File.open(fName, 'rb'){|f| linkTo = f.read}
638
+ debug("#{@path} will be a symlink to #{linkTo}")
639
+ begin
640
+ File.symlink(linkTo, @path)
641
+ rescue NotImplementedError
642
+ # windows doesn't do symlinks, for example
643
+ # just bail
644
+ File.unlink(fName) if File.exist?(fName)
645
+ return
646
+ rescue SystemCallError
647
+ $stderr.puts "Could not write symlink #{@path}: #{$!}"
648
+ end
649
+ elsif fromNode.directory?
650
+ # only get here when the dir doesn't exist. else they'd compare ==
651
+ debug(@path)
652
+ begin
653
+ Dir.mkdir(@path) unless FileTest.exist?(@path)
654
+ rescue SystemCallError
655
+ $stderr.puts "Could not mkdir #{@path}: #{$!}"
656
+ end
657
+
658
+ else
659
+ begin
660
+ File.rename(fName, @path)
661
+ rescue SystemCallError
662
+ $stderr.puts "Could not write (rename) #{@path}: #{$!}"
663
+ end
664
+
665
+ end
666
+ # clean up if the temp file is still there (as for links)
667
+ File.unlink(fName) if File.exist?(fName)
668
+
669
+ # update permissions
670
+ linkCommand = fromNode.symlink? ? 'l' : ''
671
+ begin
672
+ File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
673
+ File.send(linkCommand + 'chmod', fromNode.permissions, @path)
674
+ rescue NotImplementedError
675
+ # no one has lchmod, but who really cares
676
+ rescue SystemCallError
677
+ $stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
678
+ end
679
+ else
680
+ raise "Node provided as update source doesn't support :to_stream"
681
+ end
682
+ end
683
+ def symlink?()
684
+ FileTest.symlink?(@path)
685
+ end
686
+ def delete
687
+ # don't try to delete the restore root dir
688
+ # this is a quick fix to deal with the fact that the tree recurse has to visit the root node
689
+ return unless @name != ''
690
+ return unless FileTest.exist?(@path)
691
+ begin
692
+ if FileTest.directory?(@path)
693
+ Dir.rmdir(@path)
694
+ else
695
+ File.unlink(@path)
696
+ end
697
+ rescue SystemCallError
698
+ $stderr.puts "Could not delete #{@path}: #{$!}"
699
+ end
700
+ end
701
+ end
702
+
703
+
704
+ end #module
705
+
706
+ def debug(str)
707
+ $stderr.puts str if $S3syncOptions['--debug']
708
+ end
709
+
710
+ def ostats(last_stat = nil)
711
+ stats = Hash.new(0)
712
+ ObjectSpace.each_object {|o| stats[o.class] += 1}
713
+
714
+ stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
715
+ $stderr.printf "%-30s %10d", k, v
716
+ $stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
717
+ $stderr.puts
718
+ end
719
+
720
+ stats
721
+ end
722
+
723
+ # go!
724
+ S3sync::main