s3-sync 1.2.6
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +184 -0
- data/README.rdoc +318 -0
- data/bin/s3cmd +338 -0
- data/bin/s3sync +731 -0
- data/lib/s3sync.rb +39 -0
- data/lib/s3sync/HTTPStreaming.rb +107 -0
- data/lib/s3sync/S3.rb +714 -0
- data/lib/s3sync/S3_s3sync_mod.rb +143 -0
- data/lib/s3sync/S3encoder.rb +50 -0
- data/lib/s3sync/s3config.rb +27 -0
- data/lib/s3sync/s3try.rb +178 -0
- data/lib/s3sync/thread_generator.rb +383 -0
- data/test/test_helper.rb +3 -0
- data/test/test_s3sync.rb +11 -0
- metadata +67 -0
data/bin/s3cmd
ADDED
@@ -0,0 +1,338 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# This software code is made available "AS IS" without warranties of any
|
3
|
+
# kind. You may copy, display, modify and redistribute the software
|
4
|
+
# code either by itself or as incorporated into your code; provided that
|
5
|
+
# you do not remove any proprietary notices. Your use of this software
|
6
|
+
# code is at your own risk and you waive any claim against the author
|
7
|
+
# with respect to your use of this software code.
|
8
|
+
# (c) 2007 s3sync.net
|
9
|
+
#
|
10
|
+
|
11
|
+
module S3sync
|
12
|
+
|
13
|
+
# always look "here" for include files (thanks aktxyz)
|
14
|
+
# $LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), "../lib/s3sync/")
|
15
|
+
$LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), "../lib/")
|
16
|
+
|
17
|
+
require 's3sync'
|
18
|
+
require 'getoptlong'
|
19
|
+
|
20
|
+
|
21
|
+
def S3sync.s3cmdMain
|
22
|
+
# ---------- OPTIONS PROCESSING ---------- #
|
23
|
+
|
24
|
+
$S3syncOptions = Hash.new
|
25
|
+
optionsParser = GetoptLong.new(
|
26
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
27
|
+
[ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
|
28
|
+
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
|
29
|
+
[ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
|
30
|
+
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
31
|
+
[ '--progress', GetoptLong::NO_ARGUMENT ],
|
32
|
+
[ '--expires-in', GetoptLong::REQUIRED_ARGUMENT ]
|
33
|
+
)
|
34
|
+
|
35
|
+
def S3sync.s3cmdUsage(message = nil)
|
36
|
+
$stderr.puts message if message
|
37
|
+
name = $0.split('/').last
|
38
|
+
$stderr.puts <<"ENDUSAGE"
|
39
|
+
#{name} [options] <command> [arg(s)]\t\tversion #{S3sync::VERSION}
|
40
|
+
--help -h --verbose -v --dryrun -n
|
41
|
+
--ssl -s --debug -d --progress
|
42
|
+
--expires-in=( <# of seconds> | [#d|#h|#m|#s] )
|
43
|
+
|
44
|
+
Commands:
|
45
|
+
#{name} listbuckets [headers]
|
46
|
+
#{name} createbucket <bucket> [constraint (i.e. EU)]
|
47
|
+
#{name} deletebucket <bucket> [headers]
|
48
|
+
#{name} list <bucket>[:prefix] [max/page] [delimiter] [headers]
|
49
|
+
#{name} location <bucket> [headers]
|
50
|
+
#{name} delete <bucket>:key [headers]
|
51
|
+
#{name} deleteall <bucket>[:prefix] [headers]
|
52
|
+
#{name} get|put <bucket>:key <file> [headers]
|
53
|
+
#{name} copy <bucket>:key <bucket>:key [headers]
|
54
|
+
#{name} copyall <bucket>:key <bucket>:key [headers]
|
55
|
+
#{name} headers <bucket>:key [headers]
|
56
|
+
ENDUSAGE
|
57
|
+
exit
|
58
|
+
end #usage
|
59
|
+
|
60
|
+
begin
|
61
|
+
optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
|
62
|
+
rescue StandardError
|
63
|
+
s3cmdUsage # the parser already printed an error message
|
64
|
+
end
|
65
|
+
s3cmdUsage if $S3syncOptions['--help']
|
66
|
+
$S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or
|
67
|
+
$S3syncOptions['--debug'] or
|
68
|
+
$S3syncOptions['--progress']
|
69
|
+
|
70
|
+
# change from "" to true to appease s3 port chooser
|
71
|
+
$S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl']
|
72
|
+
|
73
|
+
if $S3syncOptions['--expires-in'] =~ /d|h|m|s/
|
74
|
+
e = $S3syncOptions['--expires-in']
|
75
|
+
days = (e =~ /(\d+)d/)? (/(\d+)d/.match(e))[1].to_i : 0
|
76
|
+
hours = (e =~ /(\d+)h/)? (/(\d+)h/.match(e))[1].to_i : 0
|
77
|
+
minutes = (e =~ /(\d+)m/)? (/(\d+)m/.match(e))[1].to_i : 0
|
78
|
+
seconds = (e =~ /(\d+)s/)? (/(\d+)s/.match(e))[1].to_i : 0
|
79
|
+
$S3syncOptions['--expires-in'] = seconds + 60 * ( minutes + 60 * ( hours + 24 * ( days ) ) )
|
80
|
+
end
|
81
|
+
|
82
|
+
# ---------- CONNECT ---------- #
|
83
|
+
S3sync::s3trySetup
|
84
|
+
|
85
|
+
|
86
|
+
# ---------- COMMAND PROCESSING ---------- #
|
87
|
+
command, path, file = ARGV
|
88
|
+
|
89
|
+
s3cmdUsage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
|
90
|
+
s3cmdUsage("Need a command (etc)") if not command
|
91
|
+
|
92
|
+
path = '' unless path
|
93
|
+
path = path.dup # modifiable
|
94
|
+
path += ':' unless path.match(':')
|
95
|
+
bucket = (/^(.*?):/.match(path))[1]
|
96
|
+
path.replace((/:(.*)$/.match(path))[1])
|
97
|
+
|
98
|
+
case command
|
99
|
+
|
100
|
+
when "delete"
|
101
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
102
|
+
s3cmdUsage("Need a key") if path == ''
|
103
|
+
headers = hashPairs(ARGV[2...ARGV.length])
|
104
|
+
$stderr.puts "delete #{bucket}:#{path} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
105
|
+
S3try(:delete, bucket, path) unless $S3syncOptions['--dryrun']
|
106
|
+
|
107
|
+
when "deleteall"
|
108
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
109
|
+
headers = hashPairs(ARGV[2...ARGV.length])
|
110
|
+
$stderr.puts "delete ALL entries in #{bucket}:#{path} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
111
|
+
more = true
|
112
|
+
marker = nil
|
113
|
+
while more do
|
114
|
+
res = s3cmdList(bucket, path, nil, nil, marker)
|
115
|
+
res.entries.each do |item|
|
116
|
+
# the s3 commands (with my modified UTF-8 conversion) expect native char encoding input
|
117
|
+
key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
118
|
+
$stderr.puts "delete #{bucket}:#{key} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
119
|
+
S3try(:delete, bucket, key) unless $S3syncOptions['--dryrun']
|
120
|
+
end
|
121
|
+
|
122
|
+
more = res.properties.is_truncated
|
123
|
+
marker = (res.properties.next_marker)? res.properties.next_marker : ((res.entries.length > 0) ? res.entries.last.key : nil)
|
124
|
+
# get this into local charset; when we pass it to s3 that is what's expected
|
125
|
+
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join if marker
|
126
|
+
end
|
127
|
+
|
128
|
+
when "list"
|
129
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
130
|
+
max, delim = ARGV[2..3]
|
131
|
+
headers = hashPairs(ARGV[4...ARGV.length])
|
132
|
+
$stderr.puts "list #{bucket}:#{path} #{max} #{delim} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
133
|
+
puts "--------------------"
|
134
|
+
|
135
|
+
more = true
|
136
|
+
marker = nil
|
137
|
+
while more do
|
138
|
+
res = s3cmdList(bucket, path, max, delim, marker, headers)
|
139
|
+
if delim
|
140
|
+
res.common_prefix_entries.each do |item|
|
141
|
+
puts "dir: " + Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
|
142
|
+
end
|
143
|
+
puts "--------------------"
|
144
|
+
end
|
145
|
+
res.entries.each do |item|
|
146
|
+
puts Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
147
|
+
end
|
148
|
+
if res.properties.is_truncated
|
149
|
+
printf "More? Y/n: "
|
150
|
+
more = (STDIN.gets.match('^[Yy]?$'))
|
151
|
+
marker = (res.properties.next_marker)? res.properties.next_marker : ((res.entries.length > 0) ? res.entries.last.key : nil)
|
152
|
+
# get this into local charset; when we pass it to s3 that is what's expected
|
153
|
+
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join if marker
|
154
|
+
else
|
155
|
+
more = false
|
156
|
+
end
|
157
|
+
end # more
|
158
|
+
|
159
|
+
when "listbuckets"
|
160
|
+
headers = hashPairs(ARGV[1...ARGV.length])
|
161
|
+
$stderr.puts "list all buckets #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
162
|
+
if $S3syncOptions['--expires-in']
|
163
|
+
$stdout.puts S3url(:list_all_my_buckets, headers)
|
164
|
+
else
|
165
|
+
res = S3try(:list_all_my_buckets, headers)
|
166
|
+
res.entries.each do |item|
|
167
|
+
puts item.name
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
when "createbucket"
|
172
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
173
|
+
lc = ''
|
174
|
+
if(ARGV.length > 2)
|
175
|
+
lc = '<CreateBucketConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01"><LocationConstraint>' + ARGV[2] + '</LocationConstraint></CreateBucketConfiguration>'
|
176
|
+
end
|
177
|
+
$stderr.puts "create bucket #{bucket} #{lc}" if $S3syncOptions['--verbose']
|
178
|
+
S3try(:create_bucket, bucket, lc) unless $S3syncOptions['--dryrun']
|
179
|
+
|
180
|
+
when "deletebucket"
|
181
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
182
|
+
headers = hashPairs(ARGV[2...ARGV.length])
|
183
|
+
$stderr.puts "delete bucket #{bucket} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
184
|
+
S3try(:delete_bucket, bucket, headers) unless $S3syncOptions['--dryrun']
|
185
|
+
|
186
|
+
when "location"
|
187
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
188
|
+
headers = hashPairs(ARGV[2...ARGV.length])
|
189
|
+
query = Hash.new
|
190
|
+
query['location'] = 'location'
|
191
|
+
$stderr.puts "location request bucket #{bucket} #{query.inspect} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
192
|
+
S3try(:get_query_stream, bucket, '', query, headers, $stdout) unless $S3syncOptions['--dryrun']
|
193
|
+
|
194
|
+
when "get"
|
195
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
196
|
+
s3cmdUsage("Need a key") if path == ''
|
197
|
+
s3cmdUsage("Need a file") if file == ''
|
198
|
+
headers = hashPairs(ARGV[3...ARGV.length])
|
199
|
+
$stderr.puts "get from key #{bucket}:#{path} into #{file} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
200
|
+
unless $S3syncOptions['--dryrun']
|
201
|
+
if $S3syncOptions['--expires-in']
|
202
|
+
$stdout.puts S3url(:get, bucket, path, headers)
|
203
|
+
else
|
204
|
+
outStream = File.open(file, 'wb')
|
205
|
+
outStream = ProgressStream.new(outStream) if $S3syncOptions['--progress']
|
206
|
+
S3try(:get_stream, bucket, path, headers, outStream)
|
207
|
+
outStream.close
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
when "put"
|
212
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
213
|
+
s3cmdUsage("Need a key") if path == ''
|
214
|
+
s3cmdUsage("Need a file") if file == ''
|
215
|
+
headers = hashPairs(ARGV[3...ARGV.length])
|
216
|
+
stream = File.open(file, 'rb')
|
217
|
+
stream = ProgressStream.new(stream, File.stat(file).size) if $S3syncOptions['--progress']
|
218
|
+
s3o = S3::S3Object.new(stream, {}) # support meta later?
|
219
|
+
headers['Content-Length'] = FileTest.size(file).to_s
|
220
|
+
$stderr.puts "put to key #{bucket}:#{path} from #{file} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
221
|
+
S3try(:put, bucket, path, s3o, headers) unless $S3syncOptions['--dryrun']
|
222
|
+
stream.close
|
223
|
+
|
224
|
+
|
225
|
+
when "copy"
|
226
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
227
|
+
s3cmdUsage("Need a key") if path == ''
|
228
|
+
s3cmdUsage("Need a file") if file == ''
|
229
|
+
|
230
|
+
file = file.dup # modifiable
|
231
|
+
file += ':' unless file.match(':')
|
232
|
+
dest_bucket = (/^(.*?):/.match(file))[1]
|
233
|
+
file.replace((/:(.*)$/.match(file))[1])
|
234
|
+
|
235
|
+
headers = hashPairs(ARGV[3...ARGV.length])
|
236
|
+
if headers.length > 0
|
237
|
+
headers["x-amz-metadata-directive"] = "REPLACE"
|
238
|
+
else
|
239
|
+
headers["x-amz-metadata-directive"] = "COPY"
|
240
|
+
end
|
241
|
+
headers["x-amz-copy-source"] = "/#{bucket}/#{URI.escape(path)}"
|
242
|
+
|
243
|
+
|
244
|
+
$stderr.puts "copy #{bucket}:#{path} to #{dest_bucket}/#{file} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
245
|
+
S3try(:put, dest_bucket, file, nil, headers) unless $S3syncOptions['--dryrun']
|
246
|
+
|
247
|
+
when "copyall"
|
248
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
249
|
+
s3cmdUsage("Need a path") if path == ''
|
250
|
+
|
251
|
+
file = file.dup # modifiable
|
252
|
+
file += ':' unless file.match(':')
|
253
|
+
dest_bucket = (/^(.*?):/.match(file))[1]
|
254
|
+
dest_key = file.replace((/:(.*)$/.match(file))[1])
|
255
|
+
|
256
|
+
src_path = path
|
257
|
+
|
258
|
+
headers = hashPairs(ARGV[2...ARGV.length])
|
259
|
+
|
260
|
+
$stderr.puts "copy #{bucket}:#{path} to #{dest_bucket}:#{dest_key} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
261
|
+
more = true
|
262
|
+
marker = nil
|
263
|
+
while more do
|
264
|
+
res = s3cmdList(bucket, path, nil, nil, marker)
|
265
|
+
res.entries.each do |item|
|
266
|
+
# the s3 commands (with my modified UTF-8 conversion) expect native char encoding input
|
267
|
+
path = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
268
|
+
|
269
|
+
file = path.gsub(src_path, dest_key)
|
270
|
+
|
271
|
+
headers = hashPairs(ARGV[3...ARGV.length])
|
272
|
+
if headers.length > 0
|
273
|
+
headers["x-amz-metadata-directive"] = "REPLACE"
|
274
|
+
else
|
275
|
+
headers["x-amz-metadata-directive"] = "COPY"
|
276
|
+
end
|
277
|
+
headers["x-amz-copy-source"] = "/#{bucket}/#{URI.escape(path)}"
|
278
|
+
|
279
|
+
$stderr.puts "copy #{bucket}:#{path} to #{dest_bucket}/#{file} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
280
|
+
S3try(:put, dest_bucket, file, nil, headers) unless $S3syncOptions['--dryrun']
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
more = res.properties.is_truncated
|
285
|
+
marker = (res.properties.next_marker)? res.properties.next_marker : ((res.entries.length > 0) ? res.entries.last.key : nil)
|
286
|
+
# get this into local charset; when we pass it to s3 that is what's expected
|
287
|
+
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join if marker
|
288
|
+
end
|
289
|
+
|
290
|
+
when "headers"
|
291
|
+
s3cmdUsage("Need a bucket") if bucket == ''
|
292
|
+
s3cmdUsage("Need a path") if path == ''
|
293
|
+
|
294
|
+
headers = hashPairs(ARGV[2...ARGV.length])
|
295
|
+
|
296
|
+
$stderr.puts "apply headers to ALL entries in #{bucket}:#{path} #{headers.inspect if headers}" if $S3syncOptions['--verbose']
|
297
|
+
more = true
|
298
|
+
marker = nil
|
299
|
+
while more do
|
300
|
+
res = s3cmdList(bucket, path, nil, nil, marker)
|
301
|
+
res.entries.each do |item|
|
302
|
+
# the s3 commands (with my modified UTF-8 conversion) expect native char encoding input
|
303
|
+
key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
304
|
+
|
305
|
+
tmpHeaders = headers.merge({
|
306
|
+
"x-amz-copy-source" => "/#{bucket}/#{URI.escape(key)}",
|
307
|
+
"x-amz-metadata-directive" => "REPLACE"
|
308
|
+
})
|
309
|
+
|
310
|
+
#
|
311
|
+
# grab the current content-type unless its been specified explicitly
|
312
|
+
#
|
313
|
+
if not tmpHeaders.key?("content-type")
|
314
|
+
currentHeaders = S3try(:head, bucket, key).object.metadata
|
315
|
+
tmpHeaders["content-type"] = currentHeaders["content-type"]
|
316
|
+
end
|
317
|
+
|
318
|
+
$stderr.puts "apply headers to #{bucket}:#{key} #{tmpHeaders.inspect}" if $S3syncOptions['--verbose']
|
319
|
+
S3try(:put, bucket, key, nil, tmpHeaders) unless $S3syncOptions['--dryrun']
|
320
|
+
end
|
321
|
+
|
322
|
+
more = res.properties.is_truncated
|
323
|
+
marker = (res.properties.next_marker)? res.properties.next_marker : ((res.entries.length > 0) ? res.entries.last.key : nil)
|
324
|
+
# get this into local charset; when we pass it to s3 that is what's expected
|
325
|
+
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join if marker
|
326
|
+
end
|
327
|
+
|
328
|
+
|
329
|
+
else
|
330
|
+
s3cmdUsage
|
331
|
+
end
|
332
|
+
|
333
|
+
end #main
|
334
|
+
|
335
|
+
end #module
|
336
|
+
|
337
|
+
|
338
|
+
S3sync::s3cmdMain #go!
|
data/bin/s3sync
ADDED
@@ -0,0 +1,731 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# This software code is made available "AS IS" without warranties of any
|
3
|
+
# kind. You may copy, display, modify and redistribute the software
|
4
|
+
# code either by itself or as incorporated into your code; provided that
|
5
|
+
# you do not remove any proprietary notices. Your use of this software
|
6
|
+
# code is at your own risk and you waive any claim against the author
|
7
|
+
# with respect to your use of this software code.
|
8
|
+
# (c) 2007 s3sync.net
|
9
|
+
#
|
10
|
+
|
11
|
+
module S3sync
|
12
|
+
|
13
|
+
$S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
|
14
|
+
$S3SYNC_VERSION = '1.2.6'
|
15
|
+
|
16
|
+
# always look "here" for include files (thanks aktxyz)
|
17
|
+
$LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), "../lib/")
|
18
|
+
|
19
|
+
require 'getoptlong'
|
20
|
+
#require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
|
21
|
+
require 's3sync/thread_generator' # memory doesn't leak with this one, at least nothing near as bad
|
22
|
+
require 'md5'
|
23
|
+
require 'tempfile'
|
24
|
+
|
25
|
+
require 's3sync'
|
26
|
+
|
27
|
+
$S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
|
28
|
+
$S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
|
29
|
+
$S3syncDirFile = Tempfile.new("s3sync")
|
30
|
+
$S3syncDirFile.puts $S3syncDirString
|
31
|
+
$S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
|
32
|
+
|
33
|
+
if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
|
34
|
+
File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
|
35
|
+
$mimeTypes = {}
|
36
|
+
f.each_line do |l|
|
37
|
+
if l =~ /^(\w\S+)\s+(\S.*)$/
|
38
|
+
type = $1
|
39
|
+
exts = $2.split
|
40
|
+
exts.each do |e|
|
41
|
+
$mimeTypes[e.to_s] = type.to_s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def S3sync.main
|
49
|
+
# ---------- OPTIONS PROCESSING ---------- #
|
50
|
+
|
51
|
+
$S3syncOptions = Hash.new
|
52
|
+
optionsParser = GetoptLong.new(
|
53
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
54
|
+
[ '--ssl', GetoptLong::NO_ARGUMENT ],
|
55
|
+
[ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
|
56
|
+
[ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
|
57
|
+
[ '--delete', GetoptLong::NO_ARGUMENT ],
|
58
|
+
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
|
59
|
+
[ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
|
60
|
+
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
61
|
+
[ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
|
62
|
+
[ '--progress', GetoptLong::NO_ARGUMENT ],
|
63
|
+
[ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
|
64
|
+
[ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
|
65
|
+
[ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
|
66
|
+
[ '--key', '-k', GetoptLong::REQUIRED_ARGUMENT],
|
67
|
+
[ '--secret', '-s', GetoptLong::REQUIRED_ARGUMENT],
|
68
|
+
[ '--make-dirs', GetoptLong::NO_ARGUMENT ],
|
69
|
+
[ '--no-md5', GetoptLong::NO_ARGUMENT ]
|
70
|
+
)
|
71
|
+
|
72
|
+
def S3sync.usage(message = nil)
|
73
|
+
$stderr.puts message if message
|
74
|
+
name = $0.split('/').last
|
75
|
+
$stderr.puts <<-ENDUSAGE
|
76
|
+
#{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
|
77
|
+
|
78
|
+
--help -h --verbose -v --dryrun -n
|
79
|
+
--ssl --recursive -r --delete
|
80
|
+
--public-read -p --expires="<exp>" --cache-control="<cc>"
|
81
|
+
--exclude="<regexp>" --progress --debug -d
|
82
|
+
--key -k --secret -s --make-dirs
|
83
|
+
--no-md5
|
84
|
+
|
85
|
+
One of <source> or <destination> must be of S3 format, the other a local path.
|
86
|
+
Reminders:
|
87
|
+
|
88
|
+
* An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like: mybucket:mypre/some/key/name
|
89
|
+
* Local paths should always use forward slashes '/' even on Windows
|
90
|
+
* Whether you use a trailing slash on the source path makes a difference.
|
91
|
+
ENDUSAGE
|
92
|
+
exit
|
93
|
+
end #usage
|
94
|
+
|
95
|
+
begin
|
96
|
+
optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
|
97
|
+
rescue StandardError
|
98
|
+
usage # the parser already printed an error message
|
99
|
+
end
|
100
|
+
usage if $S3syncOptions['--help']
|
101
|
+
$S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
|
102
|
+
$S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
|
103
|
+
|
104
|
+
if $S3syncOptions['--key']
|
105
|
+
$AWS_ACCESS_KEY_ID = $S3syncOptions['--key']
|
106
|
+
end
|
107
|
+
|
108
|
+
if $S3syncOptions['--secret']
|
109
|
+
$AWS_SECRET_ACCESS_KEY = $S3syncOptions['--secret']
|
110
|
+
end
|
111
|
+
|
112
|
+
# ---------- CONNECT ---------- #
|
113
|
+
S3sync::s3trySetup
|
114
|
+
|
115
|
+
# ---------- PREFIX PROCESSING ---------- #
|
116
|
+
def S3sync.s3Prefix?(pre)
|
117
|
+
# allow for dos-like things e.g. C:\ to be treated as local even with colon
|
118
|
+
pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
|
119
|
+
end
|
120
|
+
sourcePrefix, destinationPrefix = ARGV
|
121
|
+
usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
|
122
|
+
usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
|
123
|
+
usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
|
124
|
+
usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
|
125
|
+
|
126
|
+
# so we can modify them
|
127
|
+
sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
|
128
|
+
|
129
|
+
# handle trailing slash for source properly
|
130
|
+
if(sourcePrefix !~ %r{/$})
|
131
|
+
# no slash on end of source means we need to append the last src dir to dst prefix
|
132
|
+
# testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
|
133
|
+
slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
|
134
|
+
# not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
|
135
|
+
# take everything at the end after a slash or colon
|
136
|
+
destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
|
137
|
+
end
|
138
|
+
# no trailing slash on dest, ever.
|
139
|
+
destinationPrefix.sub!(%r{/$}, "")
|
140
|
+
|
141
|
+
# don't repeat slashes
|
142
|
+
sourcePrefix.squeeze!('/')
|
143
|
+
destinationPrefix.squeeze!('/')
|
144
|
+
|
145
|
+
# here's where we find out what direction we're going
|
146
|
+
sourceIsS3 = s3Prefix?(sourcePrefix)
|
147
|
+
# alias these variables to the other strings (in ruby = does not make copies of strings)
|
148
|
+
s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
|
149
|
+
localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
|
150
|
+
|
151
|
+
# canonicalize the S3 stuff
|
152
|
+
s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
|
153
|
+
s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
|
154
|
+
debug("s3Prefix #{s3Prefix}")
|
155
|
+
$S3SyncOriginalS3Prefix = s3Prefix.dup
|
156
|
+
|
157
|
+
# canonicalize the local stuff
|
158
|
+
# but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
|
159
|
+
# it will get re-stripped by the local generator after expressing this knowledge
|
160
|
+
localTrailingSlash = localPrefix.match(%r{/$})
|
161
|
+
localPrefix.replace(File.expand_path(localPrefix))
|
162
|
+
localPrefix += '/' if localTrailingSlash
|
163
|
+
debug("localPrefix #{localPrefix}")
|
164
|
+
# used for exclusion parsing
|
165
|
+
$S3SyncOriginalLocalPrefix = localPrefix.dup
|
166
|
+
|
167
|
+
# exclude preparation
|
168
|
+
# we don't want to build then throw away this regexp for each node in the universe; do it once globally
|
169
|
+
$S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
|
170
|
+
|
171
|
+
# ---------- GENERATORS ---------- #
|
172
|
+
|
173
|
+
# a generator that will return the files/dirs of the local tree one by one
|
174
|
+
# sorted and decorated for easy comparison with the S3 tree
|
175
|
+
localTree = Generator.new do |g|
|
176
|
+
def S3sync.localTreeRecurse(g, prefix, path)
|
177
|
+
debug("localTreeRecurse #{prefix} #{path}")
|
178
|
+
#if $S3syncOptions['--memory']
|
179
|
+
# $stderr.puts "Starting local recurse"
|
180
|
+
# stats = ostats stats
|
181
|
+
#end
|
182
|
+
d = nil
|
183
|
+
begin
|
184
|
+
slash = prefix.empty? ? "" : "/"
|
185
|
+
d = Dir.new(prefix + slash + path)
|
186
|
+
rescue Errno::ENOENT
|
187
|
+
# ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
|
188
|
+
return nil
|
189
|
+
rescue Errno::EACCES
|
190
|
+
# vista won't even let us touch some stuff in our own profile
|
191
|
+
return nil
|
192
|
+
end
|
193
|
+
# do some pre-processing
|
194
|
+
# the following sleight of hand is to make the recursion match the way s3 sorts
|
195
|
+
# take for example the directory 'foo' and the file 'foo.bar'
|
196
|
+
# when we encounter the dir we would want to recurse into it
|
197
|
+
# but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
|
198
|
+
# and the contents in that 'dir'
|
199
|
+
#
|
200
|
+
# so the solution is to not recurse into the directory until the point where
|
201
|
+
# it would come up "next" in the S3 list
|
202
|
+
# We have to do these hoops on the local side, because we have very little control
|
203
|
+
# over how S3 will return its results
|
204
|
+
toAdd = Array.new
|
205
|
+
d.each do |name|
|
206
|
+
slash = path.empty? ? "" : "/"
|
207
|
+
partialPath = path + slash + name
|
208
|
+
slash = prefix.empty? ? "" : "/"
|
209
|
+
fullPath = prefix + slash + partialPath
|
210
|
+
if name == "." or name == ".."
|
211
|
+
# skip
|
212
|
+
else
|
213
|
+
# add a dir node if appropriate
|
214
|
+
debug("Test #{fullPath}")
|
215
|
+
if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
|
216
|
+
debug("Adding it as a dir node")
|
217
|
+
toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
dItems = d.collect + toAdd
|
222
|
+
d.close
|
223
|
+
d = toAdd = nil
|
224
|
+
dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
|
225
|
+
dItems.each do |name|
|
226
|
+
isDirNode = false
|
227
|
+
if name.rindex('/') == name.length-1
|
228
|
+
name = name.slice(0...name.length-1)
|
229
|
+
isDirNode = true
|
230
|
+
debug("#{name} is a dir node")
|
231
|
+
end
|
232
|
+
slash = path.empty? ? "" : "/"
|
233
|
+
partialPath = path + slash + name
|
234
|
+
slash = prefix.empty? ? "" : "/"
|
235
|
+
fullPath = prefix + slash + partialPath
|
236
|
+
excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
|
237
|
+
if name == "." or name == ".."
|
238
|
+
# skip
|
239
|
+
elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
240
|
+
debug("skipping local item #{excludePath} because of --exclude")
|
241
|
+
elsif isDirNode
|
242
|
+
localTreeRecurse(g, prefix, partialPath)
|
243
|
+
else
|
244
|
+
# a normal looking node we should try to process
|
245
|
+
debug("local item #{fullPath}")
|
246
|
+
g.yield(LocalNode.new(prefix, partialPath))
|
247
|
+
end
|
248
|
+
end
|
249
|
+
#if $S3syncOptions['--memory']
|
250
|
+
# $stderr.puts "Ending local recurse"
|
251
|
+
# stats = ostats stats
|
252
|
+
#end
|
253
|
+
end
|
254
|
+
# a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
|
255
|
+
# so we need to think harder about what the user really meant in the command line.
|
256
|
+
localPrefixTrim = localPrefix
|
257
|
+
if localPrefix !~ %r{/$}
|
258
|
+
# no trailing slash, so yield the root itself first, then recurse if appropriate
|
259
|
+
# gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
|
260
|
+
g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
|
261
|
+
localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
|
262
|
+
else
|
263
|
+
# trailing slash, so ignore the root itself, and just go into the first level
|
264
|
+
localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
|
265
|
+
localTreeRecurse(g, localPrefixTrim, "")
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# a generator that will return the nodes in the S3 tree one by one
|
270
|
+
# sorted and decorated for easy comparison with the local tree
|
271
|
+
s3Tree = Generator.new do |g|
|
272
|
+
def S3sync.s3TreeRecurse(g, bucket, prefix, path)
|
273
|
+
if $S3syncOptions['--memory']
|
274
|
+
$stderr.puts "Starting S3 recurse"
|
275
|
+
GC.start
|
276
|
+
stats = ostats stats
|
277
|
+
end
|
278
|
+
$stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
|
279
|
+
nextPage = true
|
280
|
+
marker = ''
|
281
|
+
while nextPage do
|
282
|
+
fullPrefix = prefix + path
|
283
|
+
debug("nextPage: #{marker}") if marker != ''
|
284
|
+
options = {}
|
285
|
+
options['prefix'] = fullPrefix # start at the right depth
|
286
|
+
options['delimiter'] = '/' # only one dir at a time please
|
287
|
+
options['max-keys'] = '200' # use manageable chunks
|
288
|
+
options['marker'] = marker unless marker == ''
|
289
|
+
d = S3sync.S3try(:list_bucket, bucket, options)
|
290
|
+
$stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
|
291
|
+
# the 'directories' and leaf nodes are in two separate collections
|
292
|
+
# because a dir will never have the same name as a node, we can just shove them together and sort
|
293
|
+
# it's important to evaluate them alphabetically for efficient comparison to the local tree
|
294
|
+
tItems = d.entries + d.common_prefix_entries
|
295
|
+
tItems.sort! do |a,b|
|
296
|
+
aName = a.respond_to?('key') ? a.key : a.prefix
|
297
|
+
bName = b.respond_to?('key') ? b.key : b.prefix
|
298
|
+
# the full path will be returned, efficient to ignore the part we know will be in common
|
299
|
+
aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
|
300
|
+
end
|
301
|
+
# get rid of the big s3 objects asap, just save light-weight nodes and strings
|
302
|
+
items = tItems.collect do |item|
|
303
|
+
if item.respond_to?('key')
|
304
|
+
key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
|
305
|
+
Node.new(key, item.size, item.etag, item.last_modified)
|
306
|
+
else
|
307
|
+
Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
|
308
|
+
end
|
309
|
+
end
|
310
|
+
nextPage = d.properties.is_truncated
|
311
|
+
marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
|
312
|
+
# get this into native char set (because when we feed it back to s3 that's what it will expect)
|
313
|
+
marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
|
314
|
+
tItems = nil
|
315
|
+
d = nil # get rid of this before recursing; it's big
|
316
|
+
item = nil
|
317
|
+
GC.start # not sure but I think yielding before doing this is causing evil closure bloat
|
318
|
+
items.each do |item|
|
319
|
+
if not (item.kind_of? String)
|
320
|
+
# this is an item
|
321
|
+
excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
|
322
|
+
if $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
323
|
+
debug("skipping S3 item #{excludePath} due to --exclude")
|
324
|
+
else
|
325
|
+
debug("S3 item #{item.name}")
|
326
|
+
g.yield(S3Node.new(bucket, prefix, item))
|
327
|
+
end
|
328
|
+
else
|
329
|
+
# it's a prefix (i.e. there are sub keys)
|
330
|
+
partialPath = item.slice(prefix.length..item.length) # will have trailing slash
|
331
|
+
excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
|
332
|
+
# recurse
|
333
|
+
if $S3SyncExclude and $S3SyncExclude.match(excludePath)
|
334
|
+
debug("skipping prefix #{excludePath} due to --exclude")
|
335
|
+
else
|
336
|
+
debug("prefix found: #{partialPath}")
|
337
|
+
s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
items = nil
|
342
|
+
end # of while nextPage
|
343
|
+
if $S3syncOptions['--memory']
|
344
|
+
$stderr.puts "Ending S3 recurse"
|
345
|
+
GC.start
|
346
|
+
stats = ostats stats
|
347
|
+
end
|
348
|
+
end
|
349
|
+
# this will yield the root node first and then recurse
|
350
|
+
s3TreeRecurse(g, s3Bucket, s3Prefix, "")
|
351
|
+
end
|
352
|
+
|
353
|
+
# alias the tree objects so we don't care below which direction the transfer is going
|
354
|
+
if sourceIsS3
|
355
|
+
sourceTree, destinationTree = s3Tree, localTree
|
356
|
+
else
|
357
|
+
sourceTree, destinationTree = localTree, s3Tree
|
358
|
+
end
|
359
|
+
|
360
|
+
# ---------- COMPARATOR ---------- #
|
361
|
+
|
362
|
+
# run the comparison engine and act according to what we find for each check
|
363
|
+
nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
|
364
|
+
|
365
|
+
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
366
|
+
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
367
|
+
while sourceNode or destinationNode do
|
368
|
+
debug("source: #{sourceNode.name}") if sourceNode
|
369
|
+
debug("dest: #{destinationNode.name}") if destinationNode
|
370
|
+
if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
|
371
|
+
dNode =
|
372
|
+
if sourceNode.kind_of? LocalNode
|
373
|
+
S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
|
374
|
+
else
|
375
|
+
LocalNode.new(localPrefix, sourceNode.name)
|
376
|
+
end
|
377
|
+
puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
|
378
|
+
dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
|
379
|
+
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
380
|
+
elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
|
381
|
+
$stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
|
382
|
+
if $S3syncOptions['--delete']
|
383
|
+
if destinationNode.directory?
|
384
|
+
# have to wait
|
385
|
+
nodesToDelete.push(destinationNode)
|
386
|
+
else
|
387
|
+
puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
|
388
|
+
destinationNode.delete unless $S3syncOptions['--dryrun']
|
389
|
+
end
|
390
|
+
end
|
391
|
+
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
392
|
+
elsif sourceNode.name == destinationNode.name
|
393
|
+
if (sourceNode.size != destinationNode.size) or (($S3syncOptions['--no-md5'])? (sourceNode.date > destinationNode.date) : (sourceNode.tag != destinationNode.tag))
|
394
|
+
puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
|
395
|
+
destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
|
396
|
+
elsif $S3syncOptions['--debug']
|
397
|
+
$stderr.puts "Node #{sourceNode.name} unchanged"
|
398
|
+
end
|
399
|
+
sourceNode = sourceTree.next? ? sourceTree.next : nil
|
400
|
+
destinationNode = destinationTree.next? ? destinationTree.next : nil
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# get rid of the (now empty, except for other directories) directories
|
405
|
+
nodesToDelete.reverse_each do |node|
|
406
|
+
puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
|
407
|
+
node.delete unless $S3syncOptions['--dryrun']
|
408
|
+
end
|
409
|
+
end #main
|
410
|
+
|
411
|
+
|
412
|
+
# ---------- NODE ---------- #
|
413
|
+
class Node
|
414
|
+
attr_reader :name
|
415
|
+
attr_reader :size
|
416
|
+
attr_reader :tag
|
417
|
+
attr_reader :date
|
418
|
+
def initialize(name='', size = 0, tag = '', date = Time.now.utc)
|
419
|
+
@name = name
|
420
|
+
@size = size
|
421
|
+
@tag = tag
|
422
|
+
@date = date
|
423
|
+
end
|
424
|
+
def directory?()
|
425
|
+
@tag == $S3syncDirTag and @size == $S3syncDirString.length
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
# ---------- S3Node ---------- #
|
430
|
+
class S3Node < Node
|
431
|
+
@path = nil
|
432
|
+
@bucket = nil
|
433
|
+
@result = nil
|
434
|
+
def initialize(bucket, prefix, itemOrName)
|
435
|
+
@bucket = bucket
|
436
|
+
if itemOrName.kind_of? String
|
437
|
+
@name = itemOrName
|
438
|
+
@name.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
439
|
+
#6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
|
440
|
+
if (not prefix.empty? and @name.empty?)
|
441
|
+
@name = prefix
|
442
|
+
itemOrName = prefix
|
443
|
+
prefix = ""
|
444
|
+
end
|
445
|
+
slash = prefix.empty? ? "" : "/"
|
446
|
+
@path = prefix + slash + itemOrName
|
447
|
+
else
|
448
|
+
@name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
|
449
|
+
# depending whether the prefix is / tailed, the name might need trimming
|
450
|
+
@name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
|
451
|
+
@name.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
452
|
+
@path = itemOrName.name
|
453
|
+
@path.sub!(%r{/$}, "") # don't create directories with a slash on the end
|
454
|
+
@size = itemOrName.size
|
455
|
+
@tag = itemOrName.tag.gsub(/"/,'')
|
456
|
+
@date = Time.xmlschema(itemOrName.date)
|
457
|
+
end
|
458
|
+
debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
|
459
|
+
end
|
460
|
+
# get this item from s3 into the provided stream
|
461
|
+
# S3 pushes to the local item, due to how http streaming is implemented
|
462
|
+
def to_stream(s)
|
463
|
+
@result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
|
464
|
+
end
|
465
|
+
def symlink?()
|
466
|
+
unless @result
|
467
|
+
@result = S3sync.S3try(:head, @bucket, @path)
|
468
|
+
end
|
469
|
+
debug("symlink value is: #{@result.object.metadata['symlink']}")
|
470
|
+
@result.object.metadata['symlink'] == 'true'
|
471
|
+
end
|
472
|
+
def owner
|
473
|
+
unless @result
|
474
|
+
@result = S3sync.S3try(:head, @bucket, @path)
|
475
|
+
end
|
476
|
+
debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
|
477
|
+
@result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
|
478
|
+
end
|
479
|
+
def group
|
480
|
+
unless @result
|
481
|
+
@result = S3sync.S3try(:head, @bucket, @path)
|
482
|
+
end
|
483
|
+
@result.object.metadata['group'].to_i # 0 default ok
|
484
|
+
end
|
485
|
+
def permissions
|
486
|
+
g = @result.object.metadata['permissions']
|
487
|
+
g ? g.to_i : 600 # default to owner only
|
488
|
+
end
|
489
|
+
def updateFrom(fromNode)
|
490
|
+
if fromNode.respond_to?(:stream)
|
491
|
+
meta = Hash.new
|
492
|
+
meta['owner'] = fromNode.owner.to_s
|
493
|
+
meta['group'] = fromNode.group.to_s
|
494
|
+
meta['permissions'] = fromNode.permissions.to_s
|
495
|
+
meta['symlink'] = 'true' if fromNode.symlink?
|
496
|
+
begin
|
497
|
+
theStream = fromNode.stream
|
498
|
+
theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
|
499
|
+
|
500
|
+
s3o = S3::S3Object.new(theStream, meta)
|
501
|
+
debug(@path)
|
502
|
+
headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
|
503
|
+
headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
|
504
|
+
headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
|
505
|
+
headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
|
506
|
+
fType = @path.split('.').last
|
507
|
+
debug("File extension: #{fType}")
|
508
|
+
if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
|
509
|
+
debug("Mime type: #{mType}")
|
510
|
+
headers['Content-Type'] = mType
|
511
|
+
end
|
512
|
+
@result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
|
513
|
+
theStream.close if (theStream and not theStream.closed?)
|
514
|
+
rescue NoMethodError
|
515
|
+
# when --progress is used and we can't get the stream object, it doesn't report as null
|
516
|
+
# so the above .closed? test will break
|
517
|
+
$stderr.puts "Skipping #{@path}: " + $!
|
518
|
+
rescue SystemCallError
|
519
|
+
theStream.close if (theStream and not theStream.closed?)
|
520
|
+
$stderr.puts "Skipping #{@path}: " + $!
|
521
|
+
end
|
522
|
+
else
|
523
|
+
raise "Node provided as update source doesn't support :stream"
|
524
|
+
end
|
525
|
+
end
|
526
|
+
def delete
|
527
|
+
@result = S3sync.S3try(:delete, @bucket, @path)
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
# ---------- LocalNode ---------- #
|
532
|
+
|
533
|
+
class LocalNode < Node
|
534
|
+
@path = nil
|
535
|
+
def initialize(prefix, partialPath)
|
536
|
+
slash = prefix.empty? ? "" : "/"
|
537
|
+
@path = prefix + slash + partialPath
|
538
|
+
# slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
|
539
|
+
@name = partialPath or ''
|
540
|
+
if FileTest.symlink?(@path)
|
541
|
+
# this could use the 'file' case below, but why create an extra temp file
|
542
|
+
linkData = File.readlink(@path)
|
543
|
+
$stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
|
544
|
+
@size = linkData.length
|
545
|
+
unless $S3syncOptions['--no-md5']
|
546
|
+
md5 = Digest::MD5.new()
|
547
|
+
md5 << linkData
|
548
|
+
@tag = md5.hexdigest
|
549
|
+
end
|
550
|
+
@date = File.lstat(@path).mtime.utc
|
551
|
+
elsif FileTest.file?(@path)
|
552
|
+
@size = FileTest.size(@path)
|
553
|
+
data = nil
|
554
|
+
begin
|
555
|
+
unless $S3syncOptions['--no-md5']
|
556
|
+
data = self.stream
|
557
|
+
md5 = Digest::MD5.new()
|
558
|
+
while !data.eof?
|
559
|
+
md5 << data.read(2048) # stream so it's not taking all memory
|
560
|
+
end
|
561
|
+
data.close
|
562
|
+
@tag = md5.hexdigest
|
563
|
+
end
|
564
|
+
@date = File.stat(@path).mtime.utc
|
565
|
+
rescue SystemCallError
|
566
|
+
# well we're not going to have an md5 that's for sure
|
567
|
+
@tag = nil
|
568
|
+
end
|
569
|
+
elsif FileTest.directory?(@path)
|
570
|
+
# all s3 directories are dummy nodes contain the same directory string
|
571
|
+
# so for easy comparison, set our size and tag thusly
|
572
|
+
@size = $S3syncDirString.length
|
573
|
+
@tag = $S3syncDirTag
|
574
|
+
@date = File.stat(@path).mtime.utc
|
575
|
+
end
|
576
|
+
debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
|
577
|
+
end
|
578
|
+
# return a stream that will read the contents of the local item
|
579
|
+
# local gets pulled by the S3Node update fn, due to how http streaming is implemented
|
580
|
+
def stream
|
581
|
+
begin
|
582
|
+
# 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
|
583
|
+
if FileTest.symlink?(@path) or FileTest.directory?(@path)
|
584
|
+
tf = Tempfile.new('s3sync')
|
585
|
+
if FileTest.symlink?(@path)
|
586
|
+
tf.printf('%s', File.readlink(@path))
|
587
|
+
elsif FileTest.directory?(@path)
|
588
|
+
tf.printf('%s', $S3syncDirString)
|
589
|
+
end
|
590
|
+
tf.close
|
591
|
+
tf.open
|
592
|
+
tf
|
593
|
+
elsif FileTest.file?(@path)
|
594
|
+
File.open(@path, 'rb')
|
595
|
+
end
|
596
|
+
rescue SystemCallError
|
597
|
+
$stderr.puts "Could not read #{@path}: #{$!}"
|
598
|
+
raise
|
599
|
+
end
|
600
|
+
end
|
601
|
+
def stat
|
602
|
+
FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
|
603
|
+
end
|
604
|
+
def exist?
|
605
|
+
FileTest.exist?(@path) or FileTest.symlink?(@path)
|
606
|
+
end
|
607
|
+
def owner
|
608
|
+
self.exist? ? self.stat().uid : 0
|
609
|
+
end
|
610
|
+
def group
|
611
|
+
self.exist? ? self.stat().gid : 0
|
612
|
+
end
|
613
|
+
def permissions
|
614
|
+
self.exist? ? self.stat().mode : 600
|
615
|
+
end
|
616
|
+
def updateFrom(fromNode)
|
617
|
+
if fromNode.respond_to?(:to_stream)
|
618
|
+
fName = @path + '.s3syncTemp'
|
619
|
+
# handle the case where the user wants us to create dirs that don't exist in S3
|
620
|
+
if $S3syncOptions['--make-dirs']
|
621
|
+
# ensure target's path exists
|
622
|
+
dirs = @path.split('/')
|
623
|
+
# but the last one is a file name
|
624
|
+
dirs.pop()
|
625
|
+
current = ''
|
626
|
+
dirs.each do |dir|
|
627
|
+
current << dir << '/'
|
628
|
+
begin
|
629
|
+
Dir.mkdir(current) unless FileTest.exist?(current)
|
630
|
+
rescue SystemCallError
|
631
|
+
$stderr.puts "Could not mkdir #{current}: #{$!}"
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
unless fromNode.directory?
|
636
|
+
f = File.open(fName, 'wb')
|
637
|
+
f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
|
638
|
+
|
639
|
+
fromNode.to_stream(f)
|
640
|
+
f.close
|
641
|
+
end
|
642
|
+
# get original item out of the way
|
643
|
+
File.unlink(@path) if File.exist?(@path)
|
644
|
+
if fromNode.symlink?
|
645
|
+
linkTo = ''
|
646
|
+
File.open(fName, 'rb'){|f| linkTo = f.read}
|
647
|
+
debug("#{@path} will be a symlink to #{linkTo}")
|
648
|
+
begin
|
649
|
+
File.symlink(linkTo, @path)
|
650
|
+
rescue NotImplementedError
|
651
|
+
# windows doesn't do symlinks, for example
|
652
|
+
# just bail
|
653
|
+
File.unlink(fName) if File.exist?(fName)
|
654
|
+
return
|
655
|
+
rescue SystemCallError
|
656
|
+
$stderr.puts "Could not write symlink #{@path}: #{$!}"
|
657
|
+
end
|
658
|
+
elsif fromNode.directory?
|
659
|
+
# only get here when the dir doesn't exist. else they'd compare ==
|
660
|
+
debug(@path)
|
661
|
+
begin
|
662
|
+
Dir.mkdir(@path) unless FileTest.exist?(@path)
|
663
|
+
rescue SystemCallError
|
664
|
+
$stderr.puts "Could not mkdir #{@path}: #{$!}"
|
665
|
+
end
|
666
|
+
|
667
|
+
else
|
668
|
+
begin
|
669
|
+
File.rename(fName, @path)
|
670
|
+
rescue SystemCallError
|
671
|
+
$stderr.puts "Could not write (rename) #{@path}: #{$!}"
|
672
|
+
end
|
673
|
+
end
|
674
|
+
# clean up if the temp file is still there (as for links)
|
675
|
+
File.unlink(fName) if File.exist?(fName)
|
676
|
+
|
677
|
+
# update permissions
|
678
|
+
linkCommand = fromNode.symlink? ? 'l' : ''
|
679
|
+
begin
|
680
|
+
File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
|
681
|
+
File.send(linkCommand + 'chmod', fromNode.permissions, @path)
|
682
|
+
rescue NotImplementedError
|
683
|
+
# no one has lchmod, but who really cares
|
684
|
+
rescue SystemCallError
|
685
|
+
$stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
|
686
|
+
end
|
687
|
+
else
|
688
|
+
raise "Node provided as update source doesn't support :to_stream"
|
689
|
+
end
|
690
|
+
end
|
691
|
+
def symlink?()
|
692
|
+
FileTest.symlink?(@path)
|
693
|
+
end
|
694
|
+
def delete
|
695
|
+
# don't try to delete the restore root dir
|
696
|
+
# this is a quick fix to deal with the fact that the tree recurse has to visit the root node
|
697
|
+
return unless @name != ''
|
698
|
+
return unless FileTest.exist?(@path)
|
699
|
+
begin
|
700
|
+
if FileTest.directory?(@path)
|
701
|
+
Dir.rmdir(@path)
|
702
|
+
else
|
703
|
+
File.unlink(@path)
|
704
|
+
end
|
705
|
+
rescue SystemCallError
|
706
|
+
$stderr.puts "Could not delete #{@path}: #{$!}"
|
707
|
+
end
|
708
|
+
end
|
709
|
+
end
|
710
|
+
|
711
|
+
end #module
|
712
|
+
|
713
|
+
def debug(str)
|
714
|
+
$stderr.puts str if $S3syncOptions['--debug']
|
715
|
+
end
|
716
|
+
|
717
|
+
def ostats(last_stat = nil)
|
718
|
+
stats = Hash.new(0)
|
719
|
+
ObjectSpace.each_object {|o| stats[o.class] += 1}
|
720
|
+
|
721
|
+
stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
|
722
|
+
$stderr.printf "%-30s %10d", k, v
|
723
|
+
$stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
|
724
|
+
$stderr.puts
|
725
|
+
end
|
726
|
+
|
727
|
+
stats
|
728
|
+
end
|
729
|
+
|
730
|
+
# go!
|
731
|
+
S3sync::main
|