s3sync-cf 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/s3sync ADDED
@@ -0,0 +1,752 @@
1
+ #!/usr/bin/env ruby
2
+ # This software code is made available "AS IS" without warranties of any
3
+ # kind. You may copy, display, modify and redistribute the software
4
+ # code either by itself or as incorporated into your code; provided that
5
+ # you do not remove any proprietary notices. Your use of this software
6
+ # code is at your own risk and you waive any claim against the author
7
+ # with respect to your use of this software code.
8
+ # (c) 2007 s3sync.net
9
+ #
10
+ require 'right_aws'
11
+ module S3sync
12
+
13
+ $S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
14
+ $S3SYNC_VERSION = '1.3.1'
15
+
16
+ # always look "here" for include files (thanks aktxyz)
17
+ $LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), "../lib/")
18
+
19
+ require 'getoptlong'
20
+ # require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
21
+ unless defined?(Enumerator)
22
+ require "enumerator"
23
+ Enumerator = Enumerable::Enumerator
24
+ end
25
+ # require 's3sync/thread_generator' # memory doesn't leak with this one, at least nothing near as bad
26
+ require 'digest/md5'
27
+ require 'tempfile'
28
+
29
+ require 's3sync'
30
+
31
+ $S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
32
+ $S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
33
+ $S3syncDirFile = Tempfile.new("s3sync")
34
+ $S3syncDirFile.puts $S3syncDirString
35
+ $S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
36
+
37
+ if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
38
+ File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
39
+ $mimeTypes = {}
40
+ f.each_line do |l|
41
+ if l =~ /^(\w\S+)\s+(\S.*)$/
42
+ type = $1
43
+ exts = $2.split
44
+ exts.each do |e|
45
+ $mimeTypes[e.to_s] = type.to_s
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ def S3sync.main
53
+ # ---------- OPTIONS PROCESSING ---------- #
54
+
55
+ $S3syncOptions = Hash.new
56
+ optionsParser = GetoptLong.new(
57
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
58
+ [ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
59
+ [ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
60
+ [ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
61
+ [ '--delete', GetoptLong::NO_ARGUMENT ],
62
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
63
+ [ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
64
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
65
+ [ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
66
+ [ '--progress', GetoptLong::NO_ARGUMENT ],
67
+ [ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
68
+ [ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
69
+ [ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
70
+ [ '--gzip', GetoptLong::REQUIRED_ARGUMENT ],
71
+ [ '--key', '-k', GetoptLong::REQUIRED_ARGUMENT],
72
+ [ '--secret', GetoptLong::REQUIRED_ARGUMENT],
73
+ [ '--make-dirs', GetoptLong::NO_ARGUMENT ],
74
+ [ '--no-md5', GetoptLong::NO_ARGUMENT ],
75
+ [ '--cf-invalidate', GetoptLong::NO_ARGUMENT ],
76
+ [ '--cf-dist-id', GetoptLong::REQUIRED_ARGUMENT ]
77
+ )
78
+
79
+ def S3sync.usage(message = nil)
80
+ $stderr.puts message if message
81
+ name = $0.split('/').last
82
+ $stderr.puts <<-ENDUSAGE
83
+ #{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
84
+ --help -h --verbose -v --dryrun -n
85
+ --ssl -s --recursive -r --delete
86
+ --public-read -p --expires="<exp>" --cache-control="<cc>"
87
+ --exclude="<regexp>" --progress --debug -d
88
+ --key -k --secret -s --make-dirs
89
+ --no-md5 --gzip
90
+ One of <source> or <destination> must be of S3 format, the other a local path.
91
+ Reminders:
92
+ * An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
93
+ mybucket:mypre/some/key/name
94
+ * Local paths should always use forward slashes '/' even on Windows
95
+ * Whether you use a trailing slash on the source path makes a difference.
96
+ * For examples see README.
97
+ ENDUSAGE
98
+ exit
99
+ end #usage
100
+
101
+ begin
102
+ optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
103
+ rescue StandardError
104
+ usage # the parser already printed an error message
105
+ end
106
+ usage if $S3syncOptions['--help']
107
+ $S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
108
+ $S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
109
+
110
+ if $S3syncOptions['--key']
111
+ $AWS_ACCESS_KEY_ID = $S3syncOptions['--key']
112
+ end
113
+
114
+ if $S3syncOptions['--secret']
115
+ $AWS_SECRET_ACCESS_KEY = $S3syncOptions['--secret']
116
+ end
117
+
118
+ # ---------- CONNECT ---------- #
119
+ S3sync::s3trySetup
120
+
121
+ # ---------- PREFIX PROCESSING ---------- #
122
+ def S3sync.s3Prefix?(pre)
123
+ # allow for dos-like things e.g. C:\ to be treated as local even with colon
124
+ pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
125
+ end
126
+ sourcePrefix, destinationPrefix = ARGV
127
+ usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
128
+ usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
129
+ usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
130
+ usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
131
+ usage('Need the cloudfront distribution id') if $S3syncOptions['--cf-invalidate'] and !$S3syncOptions['--cf-dist-id']
132
+ # so we can modify them
133
+ sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
134
+
135
+ # handle trailing slash for source properly
136
+ if(sourcePrefix !~ %r{/$})
137
+ # no slash on end of source means we need to append the last src dir to dst prefix
138
+ # testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
139
+ slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
140
+ # not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
141
+ # take everything at the end after a slash or colon
142
+ destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
143
+ end
144
+ # no trailing slash on dest, ever.
145
+ destinationPrefix.sub!(%r{/$}, "")
146
+
147
+ # don't repeat slashes
148
+ sourcePrefix.squeeze!('/')
149
+ destinationPrefix.squeeze!('/')
150
+
151
+ # here's where we find out what direction we're going
152
+ sourceIsS3 = s3Prefix?(sourcePrefix)
153
+ # alias these variables to the other strings (in ruby = does not make copies of strings)
154
+ s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
155
+ localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
156
+
157
+ # canonicalize the S3 stuff
158
+ s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
159
+ s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
160
+ debug("s3Prefix #{s3Prefix}")
161
+ $S3SyncOriginalS3Prefix = s3Prefix.dup
162
+
163
+ # canonicalize the local stuff
164
+ # but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
165
+ # it will get re-stripped by the local generator after expressing this knowledge
166
+ localTrailingSlash = localPrefix.match(%r{/$})
167
+ localPrefix.replace(File.expand_path(localPrefix))
168
+ localPrefix += '/' if localTrailingSlash
169
+ debug("localPrefix #{localPrefix}")
170
+ # used for exclusion parsing
171
+ $S3SyncOriginalLocalPrefix = localPrefix.dup
172
+
173
+ # exclude preparation
174
+ # we don't want to build then throw away this regexp for each node in the universe; do it once globally
175
+ $S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
176
+
177
+ # ---------- GENERATORS ---------- #
178
+
179
+ # a generator that will return the files/dirs of the local tree one by one
180
+ # sorted and decorated for easy comparison with the S3 tree
181
+ localTree = Enumerator.new do |g|
182
+ def S3sync.localTreeRecurse(g, prefix, path)
183
+ debug("localTreeRecurse #{prefix} #{path}")
184
+ #if $S3syncOptions['--memory']
185
+ # $stderr.puts "Starting local recurse"
186
+ # stats = ostats stats
187
+ #end
188
+ d = nil
189
+ begin
190
+ slash = prefix.empty? ? "" : "/"
191
+ d = Dir.new(prefix + slash + path)
192
+ rescue Errno::ENOENT
193
+ # ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
194
+ return nil
195
+ rescue Errno::EACCES
196
+ # vista won't even let us touch some stuff in our own profile
197
+ return nil
198
+ end
199
+ # do some pre-processing
200
+ # the following sleight of hand is to make the recursion match the way s3 sorts
201
+ # take for example the directory 'foo' and the file 'foo.bar'
202
+ # when we encounter the dir we would want to recurse into it
203
+ # but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
204
+ # and the contents in that 'dir'
205
+ #
206
+ # so the solution is to not recurse into the directory until the point where
207
+ # it would come up "next" in the S3 list
208
+ # We have to do these hoops on the local side, because we have very little control
209
+ # over how S3 will return its results
210
+ toAdd = Array.new
211
+ d.each do |name|
212
+ slash = path.empty? ? "" : "/"
213
+ partialPath = path + slash + name
214
+ slash = prefix.empty? ? "" : "/"
215
+ fullPath = prefix + slash + partialPath
216
+ if name == "." or name == ".."
217
+ # skip
218
+ else
219
+ # add a dir node if appropriate
220
+ debug("Test #{fullPath}")
221
+ if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
222
+ debug("Adding it as a dir node")
223
+ toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
224
+ end
225
+ end
226
+ end
227
+ dItems = d.collect.to_a + toAdd
228
+ d.close
229
+ d = toAdd = nil
230
+ dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
231
+ dItems.each do |name|
232
+ isDirNode = false
233
+ if name.rindex('/') == name.length-1
234
+ name = name.slice(0...name.length-1)
235
+ isDirNode = true
236
+ debug("#{name} is a dir node")
237
+ end
238
+ slash = path.empty? ? "" : "/"
239
+ partialPath = path + slash + name
240
+ slash = prefix.empty? ? "" : "/"
241
+ fullPath = prefix + slash + partialPath
242
+ excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
243
+ if name == "." or name == ".."
244
+ # skip
245
+ elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
246
+ debug("skipping local item #{excludePath} because of --exclude")
247
+ elsif isDirNode
248
+ localTreeRecurse(g, prefix, partialPath)
249
+ else
250
+ # a normal looking node we should try to process
251
+ debug("local item #{fullPath}")
252
+ g.yield(LocalNode.new(prefix, partialPath))
253
+ end
254
+ end
255
+ #if $S3syncOptions['--memory']
256
+ # $stderr.puts "Ending local recurse"
257
+ # stats = ostats stats
258
+ #end
259
+ end
260
+ # a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
261
+ # so we need to think harder about what the user really meant in the command line.
262
+ localPrefixTrim = localPrefix
263
+ if localPrefix !~ %r{/$}
264
+ # no trailing slash, so yield the root itself first, then recurse if appropriate
265
+ # gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
266
+ g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
267
+ localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
268
+ else
269
+ # trailing slash, so ignore the root itself, and just go into the first level
270
+ localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
271
+ localTreeRecurse(g, localPrefixTrim, "")
272
+ end
273
+ end
274
+
275
+ # a generator that will return the nodes in the S3 tree one by one
276
+ # sorted and decorated for easy comparison with the local tree
277
+ s3Tree = Enumerator.new do |g|
278
+ def S3sync.s3TreeRecurse(g, bucket, prefix, path)
279
+ if $S3syncOptions['--memory']
280
+ $stderr.puts "Starting S3 recurse"
281
+ GC.start
282
+ stats = ostats stats
283
+ end
284
+ $stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
285
+ nextPage = true
286
+ marker = ''
287
+ while nextPage do
288
+ fullPrefix = prefix + path
289
+ debug("nextPage: #{marker}") if marker != ''
290
+ options = {}
291
+ options['prefix'] = fullPrefix # start at the right depth
292
+ options['delimiter'] = '/' # only one dir at a time please
293
+ options['max-keys'] = '200' # use manageable chunks
294
+ options['marker'] = marker unless marker == ''
295
+ d = S3sync.S3try(:list_bucket, bucket, options)
296
+ $stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
297
+ # the 'directories' and leaf nodes are in two separate collections
298
+ # because a dir will never have the same name as a node, we can just shove them together and sort
299
+ # it's important to evaluate them alphabetically for efficient comparison to the local tree
300
+ tItems = d.entries + d.common_prefix_entries
301
+ tItems.sort! do |a,b|
302
+ aName = a.respond_to?('key') ? a.key : a.prefix
303
+ bName = b.respond_to?('key') ? b.key : b.prefix
304
+ # the full path will be returned, efficient to ignore the part we know will be in common
305
+ aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
306
+ end
307
+ # get rid of the big s3 objects asap, just save light-weight nodes and strings
308
+ items = tItems.collect do |item|
309
+ if item.respond_to?('key')
310
+ key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
311
+ Node.new(key, item.size, item.etag, item.last_modified)
312
+ else
313
+ Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
314
+ end
315
+ end
316
+ nextPage = d.properties.is_truncated
317
+ marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
318
+ # get this into native char set (because when we feed it back to s3 that's what it will expect)
319
+ marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
320
+ tItems = nil
321
+ d = nil # get rid of this before recursing; it's big
322
+ item = nil
323
+ GC.start # not sure but I think yielding before doing this is causing evil closure bloat
324
+ items.each do |item|
325
+ if not (item.kind_of? String)
326
+ # this is an item
327
+ excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
328
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
329
+ debug("skipping S3 item #{excludePath} due to --exclude")
330
+ else
331
+ debug("S3 item #{item.name}")
332
+ g.yield(S3Node.new(bucket, prefix, item))
333
+ end
334
+ else
335
+ # it's a prefix (i.e. there are sub keys)
336
+ partialPath = item.slice(prefix.length..item.length) # will have trailing slash
337
+ excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
338
+ # recurse
339
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
340
+ debug("skipping prefix #{excludePath} due to --exclude")
341
+ else
342
+ debug("prefix found: #{partialPath}")
343
+ s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
344
+ end
345
+ end
346
+ end
347
+ items = nil
348
+ end # of while nextPage
349
+ if $S3syncOptions['--memory']
350
+ $stderr.puts "Ending S3 recurse"
351
+ GC.start
352
+ stats = ostats stats
353
+ end
354
+ end
355
+ # this will yield the root node first and then recurse
356
+ s3TreeRecurse(g, s3Bucket, s3Prefix, "")
357
+ end
358
+
359
+ # alias the tree objects so we don't care below which direction the transfer is going
360
+ if sourceIsS3
361
+ sourceTree, destinationTree = s3Tree, localTree
362
+ else
363
+ sourceTree, destinationTree = localTree, s3Tree
364
+ end
365
+
366
+ # ---------- COMPARATOR ---------- #
367
+
368
+ # run the comparison engine and act according to what we find for each check
369
+ nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
370
+ filesToInvalidateOnCloudfront = Array.new
371
+
372
+ sourceNode = sourceTree.next rescue nil
373
+ destinationNode = destinationTree.next rescue nil
374
+ while sourceNode or destinationNode do
375
+ debug("source: #{sourceNode.name}") if sourceNode
376
+ debug("dest: #{destinationNode.name}") if destinationNode
377
+ if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
378
+ dNode =
379
+ if sourceNode.kind_of? LocalNode
380
+ S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
381
+ else
382
+ LocalNode.new(localPrefix, sourceNode.name)
383
+ end
384
+ puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
385
+ dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
386
+ sourceNode = sourceNode.nil? ? nil : sourceTree.next rescue nil
387
+ elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
388
+ $stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
389
+ if $S3syncOptions['--delete']
390
+ if destinationNode.directory?
391
+ # have to wait
392
+ nodesToDelete.push(destinationNode)
393
+ else
394
+ puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
395
+ destinationNode.delete unless $S3syncOptions['--dryrun']
396
+ end
397
+ end
398
+ destinationNode = destinationNode.nil? ? nil : destinationTree.next rescue nil
399
+ elsif sourceNode.name == destinationNode.name
400
+ if (sourceNode.size != destinationNode.size) or (($S3syncOptions['--no-md5'])? (sourceNode.date > destinationNode.date) : (sourceNode.tag != destinationNode.tag))
401
+ puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
402
+ destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
403
+ filesToInvalidateOnCloudfront.push("/"+sourceNode.name)
404
+ elsif $S3syncOptions['--debug']
405
+ $stderr.puts "Node #{sourceNode.name} unchanged"
406
+ end
407
+ sourceNode = sourceNode.nil? ? nil : sourceTree.next rescue nil
408
+ destinationNode = destinationNode.nil? ? nil : destinationTree.next rescue nil
409
+ end
410
+ end
411
+
412
+ # get rid of the (now empty, except for other directories) directories
413
+ nodesToDelete.reverse_each do |node|
414
+ puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
415
+ node.delete unless $S3syncOptions['--dryrun']
416
+ end
417
+
418
+ if $S3syncOptions['--cf-invalidate'] && $S3syncOptions['--cf-dist-id']
419
+ puts "Invalidating following assets from s3 - #{filesToInvalidateOnCloudfront.inspect}" if $S3syncOptions['--verbose']
420
+ unless false && $S3syncOptions['--dryrun']
421
+ acf = RightAws::AcfInterface.new($AWS_ACCESS_KEY_ID, $AWS_SECRET_ACCESS_KEY)
422
+ invalidate_resp = acf.create_invalidation($S3syncOptions['--cf-dist-id'], :path => filesToInvalidateOnCloudfront)
423
+ puts invalidate_resp.inspect
424
+ end
425
+ end
426
+ end #main
427
+
428
+
429
+ # ---------- NODE ---------- #
430
+ class Node
431
+ attr_reader :name
432
+ attr_reader :size
433
+ attr_reader :tag
434
+ attr_reader :date
435
+ def initialize(name='', size = 0, tag = '', date = Time.now.utc)
436
+ @name = name
437
+ @size = size
438
+ @tag = tag
439
+ @date = date
440
+ end
441
+ def directory?()
442
+ @tag == $S3syncDirTag and @size == $S3syncDirString.length
443
+ end
444
+ end
445
+
446
+ # ---------- S3Node ---------- #
447
+ class S3Node < Node
448
+ @path = nil
449
+ @bucket = nil
450
+ @result = nil
451
+ def initialize(bucket, prefix, itemOrName)
452
+ @bucket = bucket
453
+ if itemOrName.kind_of? String
454
+ @name = itemOrName
455
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
456
+ #6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
457
+ if (not prefix.empty? and @name.empty?)
458
+ @name = prefix
459
+ itemOrName = prefix
460
+ prefix = ""
461
+ end
462
+ slash = prefix.empty? ? "" : "/"
463
+ @path = prefix + slash + itemOrName
464
+ else
465
+ @name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
466
+ # depending whether the prefix is / tailed, the name might need trimming
467
+ @name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
468
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
469
+ @path = itemOrName.name
470
+ @path.sub!(%r{/$}, "") # don't create directories with a slash on the end
471
+ @size = itemOrName.size
472
+ @tag = itemOrName.tag.gsub(/"/,'')
473
+ @date = Time.xmlschema(itemOrName.date)
474
+ end
475
+ debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
476
+ end
477
+ # get this item from s3 into the provided stream
478
+ # S3 pushes to the local item, due to how http streaming is implemented
479
+ def to_stream(s)
480
+ @result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
481
+ end
482
+ def symlink?()
483
+ unless @result
484
+ @result = S3sync.S3try(:head, @bucket, @path)
485
+ end
486
+ debug("symlink value is: #{@result.object.metadata['symlink']}")
487
+ @result.object.metadata['symlink'] == 'true'
488
+ end
489
+ def owner
490
+ unless @result
491
+ @result = S3sync.S3try(:head, @bucket, @path)
492
+ end
493
+ debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
494
+ @result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
495
+ end
496
+ def group
497
+ unless @result
498
+ @result = S3sync.S3try(:head, @bucket, @path)
499
+ end
500
+ @result.object.metadata['group'].to_i # 0 default ok
501
+ end
502
+ def permissions
503
+ g = @result.object.metadata['permissions']
504
+ g ? g.to_i : 600 # default to owner only
505
+ end
506
+ def updateFrom(fromNode)
507
+ if fromNode.respond_to?(:stream)
508
+ meta = Hash.new
509
+ meta['owner'] = fromNode.owner.to_s
510
+ meta['group'] = fromNode.group.to_s
511
+ meta['permissions'] = fromNode.permissions.to_s
512
+ meta['symlink'] = 'true' if fromNode.symlink?
513
+ begin
514
+ theStream = fromNode.stream
515
+ theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
516
+
517
+ s3o = S3::S3Object.new(theStream, meta)
518
+ debug(@path)
519
+ headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
520
+ headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
521
+ headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
522
+ headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
523
+ fType = @path.split('.').last
524
+ if ($S3syncOptions['--gzip'] || "gz").split(",").include? fType
525
+ headers['Content-Encoding'] = "gzip"
526
+ fType = @path.split('.')[-2]
527
+ end
528
+ debug("File extension: #{fType}")
529
+ if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
530
+ debug("Mime type: #{mType}")
531
+ headers['Content-Type'] = mType
532
+ end
533
+ @result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
534
+ theStream.close if (theStream and not theStream.closed?)
535
+ rescue NoMethodError
536
+ # when --progress is used and we can't get the stream object, it doesn't report as null
537
+ # so the above .closed? test will break
538
+ $stderr.puts "Skipping #{@path}: " + $!
539
+ rescue SystemCallError
540
+ theStream.close if (theStream and not theStream.closed?)
541
+ $stderr.puts "Skipping #{@path}: " + $!
542
+ end
543
+ else
544
+ raise "Node provided as update source doesn't support :stream"
545
+ end
546
+ end
547
+ def delete
548
+ @result = S3sync.S3try(:delete, @bucket, @path)
549
+ end
550
+ end
551
+
552
+ # ---------- LocalNode ---------- #
553
+
554
+ class LocalNode < Node
555
+ @path = nil
556
+ def initialize(prefix, partialPath)
557
+ slash = prefix.empty? ? "" : "/"
558
+ @path = prefix + slash + partialPath
559
+ # slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
560
+ @name = partialPath or ''
561
+ if FileTest.symlink?(@path)
562
+ # this could use the 'file' case below, but why create an extra temp file
563
+ linkData = File.readlink(@path)
564
+ $stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
565
+ @size = linkData.length
566
+ unless $S3syncOptions['--no-md5']
567
+ md5 = Digest::MD5.new()
568
+ md5 << linkData
569
+ @tag = md5.hexdigest
570
+ end
571
+ @date = File.lstat(@path).mtime.utc
572
+ elsif FileTest.file?(@path)
573
+ @size = FileTest.size(@path)
574
+ data = nil
575
+ begin
576
+ unless $S3syncOptions['--no-md5']
577
+ data = self.stream
578
+ md5 = Digest::MD5.new()
579
+ while !data.eof?
580
+ md5 << data.read(2048) # stream so it's not taking all memory
581
+ end
582
+ data.close
583
+ @tag = md5.hexdigest
584
+ end
585
+ @date = File.stat(@path).mtime.utc
586
+ rescue SystemCallError
587
+ # well we're not going to have an md5 that's for sure
588
+ @tag = nil
589
+ end
590
+ elsif FileTest.directory?(@path)
591
+ # all s3 directories are dummy nodes contain the same directory string
592
+ # so for easy comparison, set our size and tag thusly
593
+ @size = $S3syncDirString.length
594
+ @tag = $S3syncDirTag
595
+ @date = File.stat(@path).mtime.utc
596
+ end
597
+ debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
598
+ end
599
+ # return a stream that will read the contents of the local item
600
+ # local gets pulled by the S3Node update fn, due to how http streaming is implemented
601
+ def stream
602
+ begin
603
+ # 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
604
+ if FileTest.symlink?(@path) or FileTest.directory?(@path)
605
+ tf = Tempfile.new('s3sync')
606
+ if FileTest.symlink?(@path)
607
+ tf.printf('%s', File.readlink(@path))
608
+ elsif FileTest.directory?(@path)
609
+ tf.printf('%s', $S3syncDirString)
610
+ end
611
+ tf.close
612
+ tf.open
613
+ tf
614
+ elsif FileTest.file?(@path)
615
+ File.open(@path, 'rb')
616
+ end
617
+ rescue SystemCallError
618
+ $stderr.puts "Could not read #{@path}: #{$!}"
619
+ raise
620
+ end
621
+ end
622
+ def stat
623
+ FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
624
+ end
625
+ def exist?
626
+ FileTest.exist?(@path) or FileTest.symlink?(@path)
627
+ end
628
+ def owner
629
+ self.exist? ? self.stat().uid : 0
630
+ end
631
+ def group
632
+ self.exist? ? self.stat().gid : 0
633
+ end
634
+ def permissions
635
+ self.exist? ? self.stat().mode : 600
636
+ end
637
+ def updateFrom(fromNode)
638
+ if fromNode.respond_to?(:to_stream)
639
+ fName = @path + '.s3syncTemp'
640
+ # handle the case where the user wants us to create dirs that don't exist in S3
641
+ if $S3syncOptions['--make-dirs']
642
+ # ensure target's path exists
643
+ dirs = @path.split('/')
644
+ # but the last one is a file name
645
+ dirs.pop()
646
+ current = ''
647
+ dirs.each do |dir|
648
+ current << dir << '/'
649
+ begin
650
+ Dir.mkdir(current) unless FileTest.exist?(current)
651
+ rescue SystemCallError
652
+ $stderr.puts "Could not mkdir #{current}: #{$!}"
653
+ end
654
+ end
655
+ end
656
+ unless fromNode.directory?
657
+ f = File.open(fName, 'wb')
658
+ f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
659
+
660
+ fromNode.to_stream(f)
661
+ f.close
662
+ end
663
+ # get original item out of the way
664
+ File.unlink(@path) if File.exist?(@path)
665
+ if fromNode.symlink?
666
+ linkTo = ''
667
+ File.open(fName, 'rb'){|f| linkTo = f.read}
668
+ debug("#{@path} will be a symlink to #{linkTo}")
669
+ begin
670
+ File.symlink(linkTo, @path)
671
+ rescue NotImplementedError
672
+ # windows doesn't do symlinks, for example
673
+ # just bail
674
+ File.unlink(fName) if File.exist?(fName)
675
+ return
676
+ rescue SystemCallError
677
+ $stderr.puts "Could not write symlink #{@path}: #{$!}"
678
+ end
679
+ elsif fromNode.directory?
680
+ # only get here when the dir doesn't exist. else they'd compare ==
681
+ debug(@path)
682
+ begin
683
+ Dir.mkdir(@path) unless FileTest.exist?(@path)
684
+ rescue SystemCallError
685
+ $stderr.puts "Could not mkdir #{@path}: #{$!}"
686
+ end
687
+
688
+ else
689
+ begin
690
+ File.rename(fName, @path)
691
+ rescue SystemCallError
692
+ $stderr.puts "Could not write (rename) #{@path}: #{$!}"
693
+ end
694
+ end
695
+ # clean up if the temp file is still there (as for links)
696
+ File.unlink(fName) if File.exist?(fName)
697
+
698
+ # update permissions
699
+ linkCommand = fromNode.symlink? ? 'l' : ''
700
+ begin
701
+ File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
702
+ File.send(linkCommand + 'chmod', fromNode.permissions, @path)
703
+ rescue NotImplementedError
704
+ # no one has lchmod, but who really cares
705
+ rescue SystemCallError
706
+ $stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
707
+ end
708
+ else
709
+ raise "Node provided as update source doesn't support :to_stream"
710
+ end
711
+ end
712
+ def symlink?()
713
+ FileTest.symlink?(@path)
714
+ end
715
+ def delete
716
+ # don't try to delete the restore root dir
717
+ # this is a quick fix to deal with the fact that the tree recurse has to visit the root node
718
+ return unless @name != ''
719
+ return unless FileTest.exist?(@path)
720
+ begin
721
+ if FileTest.directory?(@path)
722
+ Dir.rmdir(@path)
723
+ else
724
+ File.unlink(@path)
725
+ end
726
+ rescue SystemCallError
727
+ $stderr.puts "Could not delete #{@path}: #{$!}"
728
+ end
729
+ end
730
+ end
731
+
732
+ end #module
733
+
734
+ def debug(str)
735
+ $stderr.puts str if $S3syncOptions['--debug']
736
+ end
737
+
738
+ def ostats(last_stat = nil)
739
+ stats = Hash.new(0)
740
+ ObjectSpace.each_object {|o| stats[o.class] += 1}
741
+
742
+ stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
743
+ $stderr.printf "%-30s %10d", k, v
744
+ $stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
745
+ $stderr.puts
746
+ end
747
+
748
+ stats
749
+ end
750
+
751
+ # go!
752
+ S3sync::main