s3sync-cf 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/s3sync ADDED
@@ -0,0 +1,752 @@
1
+ #!/usr/bin/env ruby
2
+ # This software code is made available "AS IS" without warranties of any
3
+ # kind. You may copy, display, modify and redistribute the software
4
+ # code either by itself or as incorporated into your code; provided that
5
+ # you do not remove any proprietary notices. Your use of this software
6
+ # code is at your own risk and you waive any claim against the author
7
+ # with respect to your use of this software code.
8
+ # (c) 2007 s3sync.net
9
+ #
10
+ require 'right_aws'
11
+ module S3sync
12
+
13
+ $S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
14
+ $S3SYNC_VERSION = '1.3.1'
15
+
16
+ # always look "here" for include files (thanks aktxyz)
17
+ $LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), "../lib/")
18
+
19
+ require 'getoptlong'
20
+ # require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
21
+ unless defined?(Enumerator)
22
+ require "enumerator"
23
+ Enumerator = Enumerable::Enumerator
24
+ end
25
+ # require 's3sync/thread_generator' # memory doesn't leak with this one, at least nothing near as bad
26
+ require 'digest/md5'
27
+ require 'tempfile'
28
+
29
+ require 's3sync'
30
+
31
+ $S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
32
+ $S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
33
+ $S3syncDirFile = Tempfile.new("s3sync")
34
+ $S3syncDirFile.puts $S3syncDirString
35
+ $S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
36
+
37
+ if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
38
+ File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
39
+ $mimeTypes = {}
40
+ f.each_line do |l|
41
+ if l =~ /^(\w\S+)\s+(\S.*)$/
42
+ type = $1
43
+ exts = $2.split
44
+ exts.each do |e|
45
+ $mimeTypes[e.to_s] = type.to_s
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ def S3sync.main
53
+ # ---------- OPTIONS PROCESSING ---------- #
54
+
55
+ $S3syncOptions = Hash.new
56
+ optionsParser = GetoptLong.new(
57
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
58
+ [ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
59
+ [ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
60
+ [ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
61
+ [ '--delete', GetoptLong::NO_ARGUMENT ],
62
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
63
+ [ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
64
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
65
+ [ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
66
+ [ '--progress', GetoptLong::NO_ARGUMENT ],
67
+ [ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
68
+ [ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
69
+ [ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
70
+ [ '--gzip', GetoptLong::REQUIRED_ARGUMENT ],
71
+ [ '--key', '-k', GetoptLong::REQUIRED_ARGUMENT],
72
+ [ '--secret', GetoptLong::REQUIRED_ARGUMENT],
73
+ [ '--make-dirs', GetoptLong::NO_ARGUMENT ],
74
+ [ '--no-md5', GetoptLong::NO_ARGUMENT ],
75
+ [ '--cf-invalidate', GetoptLong::NO_ARGUMENT ],
76
+ [ '--cf-dist-id', GetoptLong::REQUIRED_ARGUMENT ]
77
+ )
78
+
79
+ def S3sync.usage(message = nil)
80
+ $stderr.puts message if message
81
+ name = $0.split('/').last
82
+ $stderr.puts <<-ENDUSAGE
83
+ #{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
84
+ --help -h --verbose -v --dryrun -n
85
+ --ssl -s --recursive -r --delete
86
+ --public-read -p --expires="<exp>" --cache-control="<cc>"
87
+ --exclude="<regexp>" --progress --debug -d
88
+ --key -k --secret -s --make-dirs
89
+ --no-md5 --gzip
90
+ One of <source> or <destination> must be of S3 format, the other a local path.
91
+ Reminders:
92
+ * An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
93
+ mybucket:mypre/some/key/name
94
+ * Local paths should always use forward slashes '/' even on Windows
95
+ * Whether you use a trailing slash on the source path makes a difference.
96
+ * For examples see README.
97
+ ENDUSAGE
98
+ exit
99
+ end #usage
100
+
101
+ begin
102
+ optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
103
+ rescue StandardError
104
+ usage # the parser already printed an error message
105
+ end
106
+ usage if $S3syncOptions['--help']
107
+ $S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
108
+ $S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
109
+
110
+ if $S3syncOptions['--key']
111
+ $AWS_ACCESS_KEY_ID = $S3syncOptions['--key']
112
+ end
113
+
114
+ if $S3syncOptions['--secret']
115
+ $AWS_SECRET_ACCESS_KEY = $S3syncOptions['--secret']
116
+ end
117
+
118
+ # ---------- CONNECT ---------- #
119
+ S3sync::s3trySetup
120
+
121
+ # ---------- PREFIX PROCESSING ---------- #
122
+ def S3sync.s3Prefix?(pre)
123
+ # allow for dos-like things e.g. C:\ to be treated as local even with colon
124
+ pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
125
+ end
126
+ sourcePrefix, destinationPrefix = ARGV
127
+ usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
128
+ usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
129
+ usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
130
+ usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
131
+ usage('Need the cloudfront distribution id') if $S3syncOptions['--cf-invalidate'] and !$S3syncOptions['--cf-dist-id']
132
+ # so we can modify them
133
+ sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
134
+
135
+ # handle trailing slash for source properly
136
+ if(sourcePrefix !~ %r{/$})
137
+ # no slash on end of source means we need to append the last src dir to dst prefix
138
+ # testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
139
+ slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
140
+ # not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
141
+ # take everything at the end after a slash or colon
142
+ destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
143
+ end
144
+ # no trailing slash on dest, ever.
145
+ destinationPrefix.sub!(%r{/$}, "")
146
+
147
+ # don't repeat slashes
148
+ sourcePrefix.squeeze!('/')
149
+ destinationPrefix.squeeze!('/')
150
+
151
+ # here's where we find out what direction we're going
152
+ sourceIsS3 = s3Prefix?(sourcePrefix)
153
+ # alias these variables to the other strings (in ruby = does not make copies of strings)
154
+ s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
155
+ localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
156
+
157
+ # canonicalize the S3 stuff
158
+ s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
159
+ s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
160
+ debug("s3Prefix #{s3Prefix}")
161
+ $S3SyncOriginalS3Prefix = s3Prefix.dup
162
+
163
+ # canonicalize the local stuff
164
+ # but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
165
+ # it will get re-stripped by the local generator after expressing this knowledge
166
+ localTrailingSlash = localPrefix.match(%r{/$})
167
+ localPrefix.replace(File.expand_path(localPrefix))
168
+ localPrefix += '/' if localTrailingSlash
169
+ debug("localPrefix #{localPrefix}")
170
+ # used for exclusion parsing
171
+ $S3SyncOriginalLocalPrefix = localPrefix.dup
172
+
173
+ # exclude preparation
174
+ # we don't want to build then throw away this regexp for each node in the universe; do it once globally
175
+ $S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
176
+
177
+ # ---------- GENERATORS ---------- #
178
+
179
+ # a generator that will return the files/dirs of the local tree one by one
180
+ # sorted and decorated for easy comparison with the S3 tree
181
+ localTree = Enumerator.new do |g|
182
+ def S3sync.localTreeRecurse(g, prefix, path)
183
+ debug("localTreeRecurse #{prefix} #{path}")
184
+ #if $S3syncOptions['--memory']
185
+ # $stderr.puts "Starting local recurse"
186
+ # stats = ostats stats
187
+ #end
188
+ d = nil
189
+ begin
190
+ slash = prefix.empty? ? "" : "/"
191
+ d = Dir.new(prefix + slash + path)
192
+ rescue Errno::ENOENT
193
+ # ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
194
+ return nil
195
+ rescue Errno::EACCES
196
+ # vista won't even let us touch some stuff in our own profile
197
+ return nil
198
+ end
199
+ # do some pre-processing
200
+ # the following sleight of hand is to make the recursion match the way s3 sorts
201
+ # take for example the directory 'foo' and the file 'foo.bar'
202
+ # when we encounter the dir we would want to recurse into it
203
+ # but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
204
+ # and the contents in that 'dir'
205
+ #
206
+ # so the solution is to not recurse into the directory until the point where
207
+ # it would come up "next" in the S3 list
208
+ # We have to do these hoops on the local side, because we have very little control
209
+ # over how S3 will return its results
210
+ toAdd = Array.new
211
+ d.each do |name|
212
+ slash = path.empty? ? "" : "/"
213
+ partialPath = path + slash + name
214
+ slash = prefix.empty? ? "" : "/"
215
+ fullPath = prefix + slash + partialPath
216
+ if name == "." or name == ".."
217
+ # skip
218
+ else
219
+ # add a dir node if appropriate
220
+ debug("Test #{fullPath}")
221
+ if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
222
+ debug("Adding it as a dir node")
223
+ toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
224
+ end
225
+ end
226
+ end
227
+ dItems = d.collect.to_a + toAdd
228
+ d.close
229
+ d = toAdd = nil
230
+ dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
231
+ dItems.each do |name|
232
+ isDirNode = false
233
+ if name.rindex('/') == name.length-1
234
+ name = name.slice(0...name.length-1)
235
+ isDirNode = true
236
+ debug("#{name} is a dir node")
237
+ end
238
+ slash = path.empty? ? "" : "/"
239
+ partialPath = path + slash + name
240
+ slash = prefix.empty? ? "" : "/"
241
+ fullPath = prefix + slash + partialPath
242
+ excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
243
+ if name == "." or name == ".."
244
+ # skip
245
+ elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
246
+ debug("skipping local item #{excludePath} because of --exclude")
247
+ elsif isDirNode
248
+ localTreeRecurse(g, prefix, partialPath)
249
+ else
250
+ # a normal looking node we should try to process
251
+ debug("local item #{fullPath}")
252
+ g.yield(LocalNode.new(prefix, partialPath))
253
+ end
254
+ end
255
+ #if $S3syncOptions['--memory']
256
+ # $stderr.puts "Ending local recurse"
257
+ # stats = ostats stats
258
+ #end
259
+ end
260
+ # a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
261
+ # so we need to think harder about what the user really meant in the command line.
262
+ localPrefixTrim = localPrefix
263
+ if localPrefix !~ %r{/$}
264
+ # no trailing slash, so yield the root itself first, then recurse if appropriate
265
+ # gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
266
+ g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
267
+ localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
268
+ else
269
+ # trailing slash, so ignore the root itself, and just go into the first level
270
+ localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
271
+ localTreeRecurse(g, localPrefixTrim, "")
272
+ end
273
+ end
274
+
275
+ # a generator that will return the nodes in the S3 tree one by one
276
+ # sorted and decorated for easy comparison with the local tree
277
+ s3Tree = Enumerator.new do |g|
278
+ def S3sync.s3TreeRecurse(g, bucket, prefix, path)
279
+ if $S3syncOptions['--memory']
280
+ $stderr.puts "Starting S3 recurse"
281
+ GC.start
282
+ stats = ostats stats
283
+ end
284
+ $stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
285
+ nextPage = true
286
+ marker = ''
287
+ while nextPage do
288
+ fullPrefix = prefix + path
289
+ debug("nextPage: #{marker}") if marker != ''
290
+ options = {}
291
+ options['prefix'] = fullPrefix # start at the right depth
292
+ options['delimiter'] = '/' # only one dir at a time please
293
+ options['max-keys'] = '200' # use manageable chunks
294
+ options['marker'] = marker unless marker == ''
295
+ d = S3sync.S3try(:list_bucket, bucket, options)
296
+ $stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
297
+ # the 'directories' and leaf nodes are in two separate collections
298
+ # because a dir will never have the same name as a node, we can just shove them together and sort
299
+ # it's important to evaluate them alphabetically for efficient comparison to the local tree
300
+ tItems = d.entries + d.common_prefix_entries
301
+ tItems.sort! do |a,b|
302
+ aName = a.respond_to?('key') ? a.key : a.prefix
303
+ bName = b.respond_to?('key') ? b.key : b.prefix
304
+ # the full path will be returned, efficient to ignore the part we know will be in common
305
+ aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
306
+ end
307
+ # get rid of the big s3 objects asap, just save light-weight nodes and strings
308
+ items = tItems.collect do |item|
309
+ if item.respond_to?('key')
310
+ key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
311
+ Node.new(key, item.size, item.etag, item.last_modified)
312
+ else
313
+ Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
314
+ end
315
+ end
316
+ nextPage = d.properties.is_truncated
317
+ marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
318
+ # get this into native char set (because when we feed it back to s3 that's what it will expect)
319
+ marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
320
+ tItems = nil
321
+ d = nil # get rid of this before recursing; it's big
322
+ item = nil
323
+ GC.start # not sure but I think yielding before doing this is causing evil closure bloat
324
+ items.each do |item|
325
+ if not (item.kind_of? String)
326
+ # this is an item
327
+ excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
328
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
329
+ debug("skipping S3 item #{excludePath} due to --exclude")
330
+ else
331
+ debug("S3 item #{item.name}")
332
+ g.yield(S3Node.new(bucket, prefix, item))
333
+ end
334
+ else
335
+ # it's a prefix (i.e. there are sub keys)
336
+ partialPath = item.slice(prefix.length..item.length) # will have trailing slash
337
+ excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
338
+ # recurse
339
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
340
+ debug("skipping prefix #{excludePath} due to --exclude")
341
+ else
342
+ debug("prefix found: #{partialPath}")
343
+ s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
344
+ end
345
+ end
346
+ end
347
+ items = nil
348
+ end # of while nextPage
349
+ if $S3syncOptions['--memory']
350
+ $stderr.puts "Ending S3 recurse"
351
+ GC.start
352
+ stats = ostats stats
353
+ end
354
+ end
355
+ # this will yield the root node first and then recurse
356
+ s3TreeRecurse(g, s3Bucket, s3Prefix, "")
357
+ end
358
+
359
+ # alias the tree objects so we don't care below which direction the transfer is going
360
+ if sourceIsS3
361
+ sourceTree, destinationTree = s3Tree, localTree
362
+ else
363
+ sourceTree, destinationTree = localTree, s3Tree
364
+ end
365
+
366
+ # ---------- COMPARATOR ---------- #
367
+
368
+ # run the comparison engine and act according to what we find for each check
369
+ nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
370
+ filesToInvalidateOnCloudfront = Array.new
371
+
372
+ sourceNode = sourceTree.next rescue nil
373
+ destinationNode = destinationTree.next rescue nil
374
+ while sourceNode or destinationNode do
375
+ debug("source: #{sourceNode.name}") if sourceNode
376
+ debug("dest: #{destinationNode.name}") if destinationNode
377
+ if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
378
+ dNode =
379
+ if sourceNode.kind_of? LocalNode
380
+ S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
381
+ else
382
+ LocalNode.new(localPrefix, sourceNode.name)
383
+ end
384
+ puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
385
+ dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
386
+ sourceNode = sourceNode.nil? ? nil : sourceTree.next rescue nil
387
+ elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
388
+ $stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
389
+ if $S3syncOptions['--delete']
390
+ if destinationNode.directory?
391
+ # have to wait
392
+ nodesToDelete.push(destinationNode)
393
+ else
394
+ puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
395
+ destinationNode.delete unless $S3syncOptions['--dryrun']
396
+ end
397
+ end
398
+ destinationNode = destinationNode.nil? ? nil : destinationTree.next rescue nil
399
+ elsif sourceNode.name == destinationNode.name
400
+ if (sourceNode.size != destinationNode.size) or (($S3syncOptions['--no-md5'])? (sourceNode.date > destinationNode.date) : (sourceNode.tag != destinationNode.tag))
401
+ puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
402
+ destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
403
+ filesToInvalidateOnCloudfront.push("/"+sourceNode.name)
404
+ elsif $S3syncOptions['--debug']
405
+ $stderr.puts "Node #{sourceNode.name} unchanged"
406
+ end
407
+ sourceNode = sourceNode.nil? ? nil : sourceTree.next rescue nil
408
+ destinationNode = destinationNode.nil? ? nil : destinationTree.next rescue nil
409
+ end
410
+ end
411
+
412
+ # get rid of the (now empty, except for other directories) directories
413
+ nodesToDelete.reverse_each do |node|
414
+ puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
415
+ node.delete unless $S3syncOptions['--dryrun']
416
+ end
417
+
418
+ if $S3syncOptions['--cf-invalidate'] && $S3syncOptions['--cf-dist-id']
419
+ puts "Invalidating following assets from s3 - #{filesToInvalidateOnCloudfront.inspect}" if $S3syncOptions['--verbose']
420
+ unless false && $S3syncOptions['--dryrun']
421
+ acf = RightAws::AcfInterface.new($AWS_ACCESS_KEY_ID, $AWS_SECRET_ACCESS_KEY)
422
+ invalidate_resp = acf.create_invalidation($S3syncOptions['--cf-dist-id'], :path => filesToInvalidateOnCloudfront)
423
+ puts invalidate_resp.inspect
424
+ end
425
+ end
426
+ end #main
427
+
428
+
429
+ # ---------- NODE ---------- #
430
+ class Node
431
+ attr_reader :name
432
+ attr_reader :size
433
+ attr_reader :tag
434
+ attr_reader :date
435
+ def initialize(name='', size = 0, tag = '', date = Time.now.utc)
436
+ @name = name
437
+ @size = size
438
+ @tag = tag
439
+ @date = date
440
+ end
441
+ def directory?()
442
+ @tag == $S3syncDirTag and @size == $S3syncDirString.length
443
+ end
444
+ end
445
+
446
+ # ---------- S3Node ---------- #
447
+ class S3Node < Node
448
+ @path = nil
449
+ @bucket = nil
450
+ @result = nil
451
+ def initialize(bucket, prefix, itemOrName)
452
+ @bucket = bucket
453
+ if itemOrName.kind_of? String
454
+ @name = itemOrName
455
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
456
+ #6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
457
+ if (not prefix.empty? and @name.empty?)
458
+ @name = prefix
459
+ itemOrName = prefix
460
+ prefix = ""
461
+ end
462
+ slash = prefix.empty? ? "" : "/"
463
+ @path = prefix + slash + itemOrName
464
+ else
465
+ @name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
466
+ # depending whether the prefix is / tailed, the name might need trimming
467
+ @name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
468
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
469
+ @path = itemOrName.name
470
+ @path.sub!(%r{/$}, "") # don't create directories with a slash on the end
471
+ @size = itemOrName.size
472
+ @tag = itemOrName.tag.gsub(/"/,'')
473
+ @date = Time.xmlschema(itemOrName.date)
474
+ end
475
+ debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
476
+ end
477
+ # get this item from s3 into the provided stream
478
+ # S3 pushes to the local item, due to how http streaming is implemented
479
+ def to_stream(s)
480
+ @result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
481
+ end
482
+ def symlink?()
483
+ unless @result
484
+ @result = S3sync.S3try(:head, @bucket, @path)
485
+ end
486
+ debug("symlink value is: #{@result.object.metadata['symlink']}")
487
+ @result.object.metadata['symlink'] == 'true'
488
+ end
489
+ def owner
490
+ unless @result
491
+ @result = S3sync.S3try(:head, @bucket, @path)
492
+ end
493
+ debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
494
+ @result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
495
+ end
496
+ def group
497
+ unless @result
498
+ @result = S3sync.S3try(:head, @bucket, @path)
499
+ end
500
+ @result.object.metadata['group'].to_i # 0 default ok
501
+ end
502
+ def permissions
503
+ g = @result.object.metadata['permissions']
504
+ g ? g.to_i : 600 # default to owner only
505
+ end
506
+ def updateFrom(fromNode)
507
+ if fromNode.respond_to?(:stream)
508
+ meta = Hash.new
509
+ meta['owner'] = fromNode.owner.to_s
510
+ meta['group'] = fromNode.group.to_s
511
+ meta['permissions'] = fromNode.permissions.to_s
512
+ meta['symlink'] = 'true' if fromNode.symlink?
513
+ begin
514
+ theStream = fromNode.stream
515
+ theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
516
+
517
+ s3o = S3::S3Object.new(theStream, meta)
518
+ debug(@path)
519
+ headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
520
+ headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
521
+ headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
522
+ headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
523
+ fType = @path.split('.').last
524
+ if ($S3syncOptions['--gzip'] || "gz").split(",").include? fType
525
+ headers['Content-Encoding'] = "gzip"
526
+ fType = @path.split('.')[-2]
527
+ end
528
+ debug("File extension: #{fType}")
529
+ if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
530
+ debug("Mime type: #{mType}")
531
+ headers['Content-Type'] = mType
532
+ end
533
+ @result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
534
+ theStream.close if (theStream and not theStream.closed?)
535
+ rescue NoMethodError
536
+ # when --progress is used and we can't get the stream object, it doesn't report as null
537
+ # so the above .closed? test will break
538
+ $stderr.puts "Skipping #{@path}: " + $!
539
+ rescue SystemCallError
540
+ theStream.close if (theStream and not theStream.closed?)
541
+ $stderr.puts "Skipping #{@path}: " + $!
542
+ end
543
+ else
544
+ raise "Node provided as update source doesn't support :stream"
545
+ end
546
+ end
547
+ def delete
548
+ @result = S3sync.S3try(:delete, @bucket, @path)
549
+ end
550
+ end
551
+
552
+ # ---------- LocalNode ---------- #
553
+
554
+ class LocalNode < Node
555
+ @path = nil
556
+ def initialize(prefix, partialPath)
557
+ slash = prefix.empty? ? "" : "/"
558
+ @path = prefix + slash + partialPath
559
+ # slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
560
+ @name = partialPath or ''
561
+ if FileTest.symlink?(@path)
562
+ # this could use the 'file' case below, but why create an extra temp file
563
+ linkData = File.readlink(@path)
564
+ $stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
565
+ @size = linkData.length
566
+ unless $S3syncOptions['--no-md5']
567
+ md5 = Digest::MD5.new()
568
+ md5 << linkData
569
+ @tag = md5.hexdigest
570
+ end
571
+ @date = File.lstat(@path).mtime.utc
572
+ elsif FileTest.file?(@path)
573
+ @size = FileTest.size(@path)
574
+ data = nil
575
+ begin
576
+ unless $S3syncOptions['--no-md5']
577
+ data = self.stream
578
+ md5 = Digest::MD5.new()
579
+ while !data.eof?
580
+ md5 << data.read(2048) # stream so it's not taking all memory
581
+ end
582
+ data.close
583
+ @tag = md5.hexdigest
584
+ end
585
+ @date = File.stat(@path).mtime.utc
586
+ rescue SystemCallError
587
+ # well we're not going to have an md5 that's for sure
588
+ @tag = nil
589
+ end
590
+ elsif FileTest.directory?(@path)
591
+ # all s3 directories are dummy nodes contain the same directory string
592
+ # so for easy comparison, set our size and tag thusly
593
+ @size = $S3syncDirString.length
594
+ @tag = $S3syncDirTag
595
+ @date = File.stat(@path).mtime.utc
596
+ end
597
+ debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
598
+ end
599
+ # return a stream that will read the contents of the local item
600
+ # local gets pulled by the S3Node update fn, due to how http streaming is implemented
601
+ def stream
602
+ begin
603
+ # 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
604
+ if FileTest.symlink?(@path) or FileTest.directory?(@path)
605
+ tf = Tempfile.new('s3sync')
606
+ if FileTest.symlink?(@path)
607
+ tf.printf('%s', File.readlink(@path))
608
+ elsif FileTest.directory?(@path)
609
+ tf.printf('%s', $S3syncDirString)
610
+ end
611
+ tf.close
612
+ tf.open
613
+ tf
614
+ elsif FileTest.file?(@path)
615
+ File.open(@path, 'rb')
616
+ end
617
+ rescue SystemCallError
618
+ $stderr.puts "Could not read #{@path}: #{$!}"
619
+ raise
620
+ end
621
+ end
622
+ def stat
623
+ FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
624
+ end
625
+ def exist?
626
+ FileTest.exist?(@path) or FileTest.symlink?(@path)
627
+ end
628
+ def owner
629
+ self.exist? ? self.stat().uid : 0
630
+ end
631
+ def group
632
+ self.exist? ? self.stat().gid : 0
633
+ end
634
+ def permissions
635
+ self.exist? ? self.stat().mode : 600
636
+ end
637
+ def updateFrom(fromNode)
638
+ if fromNode.respond_to?(:to_stream)
639
+ fName = @path + '.s3syncTemp'
640
+ # handle the case where the user wants us to create dirs that don't exist in S3
641
+ if $S3syncOptions['--make-dirs']
642
+ # ensure target's path exists
643
+ dirs = @path.split('/')
644
+ # but the last one is a file name
645
+ dirs.pop()
646
+ current = ''
647
+ dirs.each do |dir|
648
+ current << dir << '/'
649
+ begin
650
+ Dir.mkdir(current) unless FileTest.exist?(current)
651
+ rescue SystemCallError
652
+ $stderr.puts "Could not mkdir #{current}: #{$!}"
653
+ end
654
+ end
655
+ end
656
+ unless fromNode.directory?
657
+ f = File.open(fName, 'wb')
658
+ f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
659
+
660
+ fromNode.to_stream(f)
661
+ f.close
662
+ end
663
+ # get original item out of the way
664
+ File.unlink(@path) if File.exist?(@path)
665
+ if fromNode.symlink?
666
+ linkTo = ''
667
+ File.open(fName, 'rb'){|f| linkTo = f.read}
668
+ debug("#{@path} will be a symlink to #{linkTo}")
669
+ begin
670
+ File.symlink(linkTo, @path)
671
+ rescue NotImplementedError
672
+ # windows doesn't do symlinks, for example
673
+ # just bail
674
+ File.unlink(fName) if File.exist?(fName)
675
+ return
676
+ rescue SystemCallError
677
+ $stderr.puts "Could not write symlink #{@path}: #{$!}"
678
+ end
679
+ elsif fromNode.directory?
680
+ # only get here when the dir doesn't exist. else they'd compare ==
681
+ debug(@path)
682
+ begin
683
+ Dir.mkdir(@path) unless FileTest.exist?(@path)
684
+ rescue SystemCallError
685
+ $stderr.puts "Could not mkdir #{@path}: #{$!}"
686
+ end
687
+
688
+ else
689
+ begin
690
+ File.rename(fName, @path)
691
+ rescue SystemCallError
692
+ $stderr.puts "Could not write (rename) #{@path}: #{$!}"
693
+ end
694
+ end
695
+ # clean up if the temp file is still there (as for links)
696
+ File.unlink(fName) if File.exist?(fName)
697
+
698
+ # update permissions
699
+ linkCommand = fromNode.symlink? ? 'l' : ''
700
+ begin
701
+ File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
702
+ File.send(linkCommand + 'chmod', fromNode.permissions, @path)
703
+ rescue NotImplementedError
704
+ # no one has lchmod, but who really cares
705
+ rescue SystemCallError
706
+ $stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
707
+ end
708
+ else
709
+ raise "Node provided as update source doesn't support :to_stream"
710
+ end
711
+ end
712
+ def symlink?()
713
+ FileTest.symlink?(@path)
714
+ end
715
+ def delete
716
+ # don't try to delete the restore root dir
717
+ # this is a quick fix to deal with the fact that the tree recurse has to visit the root node
718
+ return unless @name != ''
719
+ return unless FileTest.exist?(@path)
720
+ begin
721
+ if FileTest.directory?(@path)
722
+ Dir.rmdir(@path)
723
+ else
724
+ File.unlink(@path)
725
+ end
726
+ rescue SystemCallError
727
+ $stderr.puts "Could not delete #{@path}: #{$!}"
728
+ end
729
+ end
730
+ end
731
+
732
+ end #module
733
+
734
+ def debug(str)
735
+ $stderr.puts str if $S3syncOptions['--debug']
736
+ end
737
+
738
+ def ostats(last_stat = nil)
739
+ stats = Hash.new(0)
740
+ ObjectSpace.each_object {|o| stats[o.class] += 1}
741
+
742
+ stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
743
+ $stderr.printf "%-30s %10d", k, v
744
+ $stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
745
+ $stderr.puts
746
+ end
747
+
748
+ stats
749
+ end
750
+
751
+ # go!
752
+ S3sync::main