s3_meta_sync 0.3.6 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4c9d56dad09b4d11098f9e57172fe7776c5d17c8
4
- data.tar.gz: 9e93c8e403705699f0aa3d0d78d96d1817d53861
3
+ metadata.gz: 417959a794098b61bbd76e514822749834515e30
4
+ data.tar.gz: b4d407672cd4084340394cfcdd79bb5bba6caead
5
5
  SHA512:
6
- metadata.gz: 4c79cc1844b2a247ea0fc1afc2d0e6db75836448bb048d7fd258882dcf8200ac6661afac214efc9e03b7c97eaa9ef5379c6c2a96efd33ae186fadc2ee6eb4925
7
- data.tar.gz: 5120a5e78050ee13b38b12ef79b05f1ea29f5fb4e120675eeadfa6378719bb18ff493fb8ab0ee1c0e73588d1e91a4d4e48fd8ca8cc3ef2300a04433d3d0aa714
6
+ metadata.gz: ada88904ff3f3035975bfbb123bb5ab7d90a71a1c704c203995290ef7507ccae63875b8836148330c774d33d8d2a501160a26d36d0fd8c21a9eade8c6ad702aa
7
+ data.tar.gz: 4f93506f40bc7ab0f7b8381ccc42a7b334174464fc6e6dfc7f4df10af8d8f32db48f0665345e80f0486e66bee427a8c7a2387d096a385835736c55b7ef703406
checksums.yaml.gz.sig CHANGED
Binary file
@@ -0,0 +1,251 @@
1
+ require "open-uri"
2
+ require "yaml"
3
+ require "digest/md5"
4
+ require "fileutils"
5
+ require "tmpdir"
6
+
7
+ require "aws/s3"
8
+
9
+ if RUBY_VERSION < "2.0.0"
10
+ # need to require these or upload in multiple threads will fail on systems with high load
11
+ require "aws/s3/s3_object"
12
+ require "aws/core/response"
13
+ require "aws/s3/object_collection"
14
+ end
15
+
16
+ require "s3_meta_sync/zip"
17
+
18
+ module S3MetaSync
19
+ class Syncer
20
+ def initialize(config)
21
+ @config = config
22
+ end
23
+
24
+ def sync(source, destination)
25
+ raise if source.end_with?("/") or destination.end_with?("/")
26
+
27
+ if destination.include?(":")
28
+ @bucket, destination = destination.split(":")
29
+ upload(source, destination)
30
+ else
31
+ @bucket, source = source.split(":")
32
+ download(source, destination)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def upload(source, destination)
39
+ corrupted = consume_corrupted_files(source)
40
+ remote_meta = begin
41
+ download_meta(destination)
42
+ rescue RemoteWithoutMeta
43
+ log "Remote has no .s3-meta-sync, uploading everything", true
44
+ {files: {}}
45
+ end
46
+ generate_meta(source)
47
+ local_files = read_meta(source)[:files]
48
+ remote_files = remote_meta[:files]
49
+ upload = if @config[:zip] == remote_meta[:zip]
50
+ local_files.select { |path, md5| remote_files[path] != md5 || corrupted.include?(path) }
51
+ else
52
+ local_files
53
+ end.map(&:first)
54
+ delete = remote_files.keys - local_files.keys
55
+ log "Uploading: #{upload.size} Deleting: #{delete.size}", true
56
+
57
+ upload_files(source, destination, upload)
58
+ delete_remote_files(destination, delete)
59
+ upload_file(source, META_FILE, destination)
60
+ end
61
+
62
+ def download(source, destination)
63
+ raise if @config[:zip]
64
+ remote_meta = download_meta(source)
65
+ generate_meta(destination)
66
+ local_files = read_meta(destination)[:files]
67
+ download = remote_meta[:files].select { |path, md5| local_files[path] != md5 }.map(&:first)
68
+ delete = local_files.keys - remote_meta[:files].keys
69
+
70
+ log "Downloading: #{download.size} Deleting: #{delete.size}", true
71
+
72
+ unless download.empty? && delete.empty?
73
+ Dir.mktmpdir do |staging_area|
74
+ copy_content(destination, staging_area)
75
+ download_files(source, staging_area, download, remote_meta[:zip])
76
+ delete_local_files(staging_area, delete)
77
+ download_file(source, META_FILE, staging_area, false)
78
+ verify_integrity!(staging_area, destination)
79
+ delete_empty_folders(staging_area)
80
+ self.class.swap_in_directory(destination, staging_area)
81
+ FileUtils.mkdir(staging_area)
82
+ end
83
+ end
84
+ end
85
+
86
+ def copy_content(destination, dir)
87
+ system "cp -R #{destination}/* #{dir} 2>/dev/null"
88
+ end
89
+
90
+ # almost atomic when destination and temp dir are not on the same device
91
+ def self.swap_in_directory(destination, dir)
92
+ next_dir = "#{destination}-next"
93
+ delete = "#{destination}-delete"
94
+
95
+ # clean up potential leftovers from last run
96
+ FileUtils.remove_dir(next_dir) if File.exist?(next_dir)
97
+ FileUtils.remove_dir(delete) if File.exist?(delete)
98
+
99
+ # move onto the same device
100
+ FileUtils.mv(dir, next_dir)
101
+
102
+ # copy permissions
103
+ FileUtils.chmod_R(File.stat(destination).mode, next_dir)
104
+
105
+ # swap
106
+ FileUtils.mv(destination, delete)
107
+ FileUtils.mv(next_dir, destination)
108
+
109
+ # cleanup old
110
+ FileUtils.remove_dir(delete)
111
+ end
112
+
113
+ def verify_integrity!(staging_area, destination)
114
+ file = "#{staging_area}/#{META_FILE}"
115
+ remote = YAML.load_file(file)[:files]
116
+ actual = meta_data(staging_area)[:files]
117
+
118
+ if remote != actual
119
+ corrupted = actual.select { |file, md5| remote[file] && remote[file] != md5 }.map(&:first)
120
+ File.write("#{destination}/#{CORRUPT_FILES_LOG}", corrupted.join("\n"))
121
+ log "corrupted files downloaded:\n#{corrupted.join("\n")}", true
122
+ raise RemoteCorrupt
123
+ end
124
+ end
125
+
126
+ def consume_corrupted_files(source)
127
+ log = "#{source}/#{CORRUPT_FILES_LOG}"
128
+ if File.exist?(log)
129
+ corrupted = File.read(log).split("\n")
130
+ log "force uploading #{corrupted.size} corrupted files", true
131
+ File.unlink log
132
+ corrupted
133
+ else
134
+ []
135
+ end
136
+ end
137
+
138
+ def upload_file(source, path, destination)
139
+ log "Uploading #{path}"
140
+ content = File.read("#{source}/#{path}")
141
+ content = Zip.zip(content) if @config[:zip]
142
+ s3.objects["#{destination}/#{path}"].write content, :acl => :public_read
143
+ end
144
+
145
+ def delete_remote_files(remote, paths)
146
+ paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
147
+ s3.objects.delete paths.map { |path| "#{remote}/#{path}" }
148
+ end
149
+
150
+ def delete_local_files(local, paths)
151
+ paths = paths.map { |path| "#{local}/#{path}" }
152
+ paths.each { |path| log "Deleting #{path}" }
153
+ File.delete(*paths)
154
+ end
155
+
156
+ def s3
157
+ @s3 ||= ::AWS::S3.new(
158
+ access_key_id: @config[:key],
159
+ secret_access_key: @config[:secret]
160
+ ).buckets[@bucket]
161
+ end
162
+
163
+ def generate_meta(source)
164
+ file = "#{source}/#{META_FILE}"
165
+ FileUtils.mkdir_p(File.dirname(file))
166
+ File.write(file, meta_data(source).to_yaml)
167
+ end
168
+
169
+ def meta_data(source)
170
+ return {} unless File.directory?(source)
171
+ files = Dir.chdir(source) do
172
+ files = Dir["**/*"].select { |f| File.file?(f) }
173
+ Hash[files.map { |file| [file, Digest::MD5.file(file).to_s] }]
174
+ end
175
+ {files: files}
176
+ end
177
+
178
+ def read_meta(source)
179
+ file = "#{source}/#{META_FILE}"
180
+ File.exist?(file) ? YAML.load(File.read(file)) : {}
181
+ end
182
+
183
+ def download_meta(destination)
184
+ content = download_content("#{destination}/#{META_FILE}")
185
+ result = YAML.load(content)
186
+ result.key?(:files) ? result : {files: result} # support new an old format
187
+ rescue
188
+ raise RemoteWithoutMeta
189
+ end
190
+
191
+ def download_file(source, path, destination, zip)
192
+ content = download_content("#{source}/#{path}")
193
+ content = Zip.unzip(content) if zip
194
+ file = "#{destination}/#{path}"
195
+ FileUtils.mkdir_p(File.dirname(file))
196
+ File.write(file, content, :encoding => content.encoding)
197
+ end
198
+
199
+ def download_content(path)
200
+ log "Downloading #{path}"
201
+ url = "https://s3#{"-#{region}" if region}.amazonaws.com/#{@bucket}/#{path}"
202
+ options = (@config[:ssl_none] ? {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE} : {})
203
+ open(url, options).read
204
+ rescue OpenURI::HTTPError
205
+ raise "Unable to download #{url} -- #{$!}"
206
+ rescue OpenSSL::SSL::SSLError
207
+ retries ||= 0
208
+ retries += 1
209
+ if retries == 1
210
+ log "SSL error downloading #{path}, retrying"
211
+ retry
212
+ else
213
+ raise
214
+ end
215
+ end
216
+
217
+ def delete_empty_folders(destination)
218
+ `find #{destination} -depth -empty -delete`
219
+ end
220
+
221
+ def download_files(source, destination, paths, zip)
222
+ in_multiple_threads(paths) do |path|
223
+ download_file(source, path, destination, zip)
224
+ end
225
+ end
226
+
227
+ def upload_files(source, destination, paths)
228
+ in_multiple_threads(paths) { |path| upload_file(source, path, destination) }
229
+ end
230
+
231
+ def region
232
+ @config[:region] unless @config[:region].to_s.empty?
233
+ end
234
+
235
+ def in_multiple_threads(data)
236
+ threads = [@config[:parallel] || 10, data.size].min
237
+ data = data.dup
238
+ (0...threads).to_a.map do
239
+ Thread.new do
240
+ while slice = data.shift
241
+ yield slice
242
+ end
243
+ end
244
+ end.each(&:join)
245
+ end
246
+
247
+ def log(text, important=false)
248
+ $stderr.puts text if @config[:verbose] or important
249
+ end
250
+ end
251
+ end
@@ -1,3 +1,3 @@
1
1
  module S3MetaSync
2
- VERSION = "0.3.6"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -0,0 +1,20 @@
1
+ require "zlib"
2
+ require "stringio"
3
+
4
+ module S3MetaSync
5
+ module Zip
6
+ class << self
7
+ def zip(string)
8
+ io = StringIO.new("w")
9
+ w_gz = Zlib::GzipWriter.new(io)
10
+ w_gz.write(string)
11
+ w_gz.close
12
+ io.string
13
+ end
14
+
15
+ def unzip(string)
16
+ Zlib::GzipReader.new(StringIO.new(string, "rb")).read
17
+ end
18
+ end
19
+ end
20
+ end
data/lib/s3_meta_sync.rb CHANGED
@@ -1,19 +1,6 @@
1
- require "s3_meta_sync/version"
2
- require "open-uri"
3
- require "yaml"
4
- require "digest/md5"
5
1
  require "optparse"
6
- require "fileutils"
7
- require "tmpdir"
8
-
9
- require "aws/s3"
10
-
11
- if RUBY_VERSION < "2.0.0"
12
- # need to require these or upload in multiple threads will fail on systems with high load
13
- require "aws/s3/s3_object"
14
- require "aws/core/response"
15
- require "aws/s3/object_collection"
16
- end
2
+ require "s3_meta_sync/version"
3
+ require "s3_meta_sync/syncer"
17
4
 
18
5
  module S3MetaSync
19
6
  RemoteWithoutMeta = Class.new(Exception)
@@ -21,223 +8,6 @@ module S3MetaSync
21
8
  META_FILE = ".s3-meta-sync"
22
9
  CORRUPT_FILES_LOG = "s3-meta-sync-corrupted.log"
23
10
 
24
- class Syncer
25
- def initialize(config)
26
- @config = config
27
- end
28
-
29
- def sync(source, destination)
30
- raise if source.end_with?("/") or destination.end_with?("/")
31
-
32
- if destination.include?(":")
33
- @bucket, destination = destination.split(":")
34
- upload(source, destination)
35
- else
36
- @bucket, source = source.split(":")
37
- download(source, destination)
38
- end
39
- end
40
-
41
- private
42
-
43
- def upload(source, destination)
44
- corrupted = consume_corrupted_files(source)
45
- remote_info = begin
46
- download_meta(destination)
47
- rescue RemoteWithoutMeta
48
- log "Remote has no .s3-meta-sync, uploading everything", true
49
- {}
50
- end
51
- generate_meta(source)
52
- local_info = read_meta(source)
53
- upload = local_info.select { |path, md5| remote_info[path] != md5 || corrupted.include?(path) }.map(&:first)
54
- delete = remote_info.keys - local_info.keys
55
- log "Uploading: #{upload.size} Deleting: #{delete.size}", true
56
-
57
- upload_files(source, destination, upload)
58
- delete_remote_files(destination, delete)
59
- upload_file(source, META_FILE, destination)
60
- end
61
-
62
- def download(source, destination)
63
- remote_info = download_meta(source)
64
- generate_meta(destination)
65
- local_info = read_meta(destination)
66
- download = remote_info.select { |path, md5| local_info[path] != md5 }.map(&:first)
67
- delete = local_info.keys - remote_info.keys
68
-
69
- log "Downloading: #{download.size} Deleting: #{delete.size}", true
70
-
71
- unless download.empty? && delete.empty?
72
- Dir.mktmpdir do |staging_area|
73
- copy_content(destination, staging_area)
74
- download_files(source, staging_area, download)
75
- delete_local_files(staging_area, delete)
76
- download_file(source, META_FILE, staging_area)
77
- verify_integrity!(staging_area, destination)
78
- delete_empty_folders(staging_area)
79
- self.class.swap_in_directory(destination, staging_area)
80
- FileUtils.mkdir(staging_area)
81
- end
82
- end
83
- end
84
-
85
- def copy_content(destination, dir)
86
- system "cp -R #{destination}/* #{dir} 2>/dev/null"
87
- end
88
-
89
- # almost atomic when destination and temp dir are not on the same device
90
- def self.swap_in_directory(destination, dir)
91
- next_dir = "#{destination}-next"
92
- delete = "#{destination}-delete"
93
-
94
- # clean up potential leftovers from last run
95
- FileUtils.remove_dir(next_dir) if File.exist?(next_dir)
96
- FileUtils.remove_dir(delete) if File.exist?(delete)
97
-
98
- # move onto the same device
99
- FileUtils.mv(dir, next_dir)
100
-
101
- # copy permissions
102
- FileUtils.chmod_R(File.stat(destination).mode, next_dir)
103
-
104
- # swap
105
- FileUtils.mv(destination, delete)
106
- FileUtils.mv(next_dir, destination)
107
-
108
- # cleanup old
109
- FileUtils.remove_dir(delete)
110
- end
111
-
112
- def verify_integrity!(staging_area, destination)
113
- file = "#{staging_area}/#{META_FILE}"
114
- remote = YAML.load_file(file)
115
- actual = meta_data(staging_area)
116
-
117
- if remote != actual
118
- corrupted = actual.select { |file, md5| remote[file] && remote[file] != md5 }.map(&:first)
119
- File.write("#{destination}/#{CORRUPT_FILES_LOG}", corrupted.join("\n"))
120
- log "corrupted files downloaded:\n#{corrupted.join("\n")}", true
121
- raise RemoteCorrupt
122
- end
123
- end
124
-
125
- def consume_corrupted_files(source)
126
- log = "#{source}/#{CORRUPT_FILES_LOG}"
127
- if File.exist?(log)
128
- corrupted = File.read(log).split("\n")
129
- log "force uploading #{corrupted.size} corrupted files", true
130
- File.unlink log
131
- corrupted
132
- else
133
- []
134
- end
135
- end
136
-
137
- def upload_file(source, path, destination)
138
- log "Uploading #{path}"
139
- s3.objects["#{destination}/#{path}"].write File.read("#{source}/#{path}"), :acl => :public_read
140
- end
141
-
142
- def delete_remote_files(remote, paths)
143
- paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
144
- s3.objects.delete paths.map { |path| "#{remote}/#{path}" }
145
- end
146
-
147
- def delete_local_files(local, paths)
148
- paths = paths.map { |path| "#{local}/#{path}" }
149
- paths.each { |path| log "Deleting #{path}" }
150
- File.delete(*paths)
151
- end
152
-
153
- def s3
154
- @s3 ||= ::AWS::S3.new(:access_key_id => @config[:key], :secret_access_key => @config[:secret]).buckets[@bucket]
155
- end
156
-
157
- def generate_meta(source)
158
- file = "#{source}/#{META_FILE}"
159
- FileUtils.mkdir_p(File.dirname(file))
160
- File.write(file, meta_data(source).to_yaml)
161
- end
162
-
163
- def meta_data(source)
164
- return {} unless File.directory?(source)
165
- Dir.chdir(source) do
166
- files = Dir["**/*"].select { |f| File.file?(f) }
167
- Hash[files.map { |file| [file, Digest::MD5.file(file).to_s] }]
168
- end
169
- end
170
-
171
- def read_meta(source)
172
- file = "#{source}/#{META_FILE}"
173
- File.exist?(file) ? YAML.load(File.read(file)) : {}
174
- end
175
-
176
- def download_meta(destination)
177
- content = download_content("#{destination}/#{META_FILE}")
178
- YAML.load(content)
179
- rescue
180
- raise RemoteWithoutMeta
181
- end
182
-
183
- def download_file(source, path, destination)
184
- content = download_content("#{source}/#{path}")
185
- file = "#{destination}/#{path}"
186
- FileUtils.mkdir_p(File.dirname(file))
187
- File.write(file, content, :encoding => content.encoding)
188
- end
189
-
190
- def download_content(path)
191
- log "Downloading #{path}"
192
- url = "https://s3#{"-#{region}" if region}.amazonaws.com/#{@bucket}/#{path}"
193
- options = (@config[:ssl_none] ? {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE} : {})
194
- open(url, options).read
195
- rescue OpenURI::HTTPError
196
- raise "Unable to download #{url} -- #{$!}"
197
- rescue OpenSSL::SSL::SSLError
198
- retries ||= 0
199
- retries += 1
200
- if retries == 1
201
- log "SSL error downloading #{path}, retrying"
202
- retry
203
- else
204
- raise
205
- end
206
- end
207
-
208
- def delete_empty_folders(destination)
209
- `find #{destination} -depth -empty -delete`
210
- end
211
-
212
- def download_files(source, destination, paths)
213
- in_multiple_threads(paths) { |path| download_file(source, path, destination) }
214
- end
215
-
216
- def upload_files(source, destination, paths)
217
- in_multiple_threads(paths) { |path| upload_file(source, path, destination) }
218
- end
219
-
220
- def region
221
- @config[:region] unless @config[:region].to_s.empty?
222
- end
223
-
224
- def in_multiple_threads(data)
225
- threads = [@config[:parallel] || 10, data.size].min
226
- data = data.dup
227
- (0...threads).to_a.map do
228
- Thread.new do
229
- while slice = data.shift
230
- yield slice
231
- end
232
- end
233
- end.each(&:join)
234
- end
235
-
236
- def log(text, important=false)
237
- $stderr.puts text if @config[:verbose] or important
238
- end
239
- end
240
-
241
11
  class << self
242
12
  def run(argv)
243
13
  source, dest, options = parse_options(argv)
@@ -247,8 +17,9 @@ module S3MetaSync
247
17
 
248
18
  def parse_options(argv)
249
19
  options = {
250
- :key => ENV["AWS_ACCESS_KEY_ID"],
251
- :secret => ENV["AWS_SECRET_ACCESS_KEY"]
20
+ key: ENV["AWS_ACCESS_KEY_ID"],
21
+ secret: ENV["AWS_SECRET_ACCESS_KEY"],
22
+ zip: false,
252
23
  }
253
24
  OptionParser.new do |opts|
254
25
  opts.banner = <<-BANNER.gsub(/^ {10}/, "")
@@ -268,7 +39,8 @@ module S3MetaSync
268
39
  opts.on("-s", "--secret SECRET", "AWS secret key") { |c| options[:secret] = c }
269
40
  opts.on("-r", "--region REGION", "AWS region if not us-standard") { |c| options[:region] = c }
270
41
  opts.on("-p", "--parallel COUNT", Integer, "Use COUNT threads for download/upload default: 10") { |c| options[:parallel] = c }
271
- opts.on("--ssl-none", "Do not verify ssl certs") { |c| options[:ssl_none] = true }
42
+ opts.on("--ssl-none", "Do not verify ssl certs") { options[:ssl_none] = true }
43
+ opts.on("-z", "--zip", "Zip when uploading to save bandwidth") { options[:zip] = true }
272
44
  opts.on("-V", "--verbose", "Verbose mode"){ options[:verbose] = true }
273
45
  opts.on("-h", "--help", "Show this.") { puts opts; exit }
274
46
  opts.on("-v", "--version", "Show Version"){ puts VERSION; exit}
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3_meta_sync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Grosser
@@ -30,7 +30,7 @@ cert_chain:
30
30
  F5etKHZg0j3eHO31/i2HnswY04lqGImUu6aM5EnijFTB7PPW2KwKKM4+kKDYFdlw
31
31
  /0WV1Ng2/Y6qsHwmqGg2VlYj2h4=
32
32
  -----END CERTIFICATE-----
33
- date: 2014-11-19 00:00:00.000000000 Z
33
+ date: 2014-12-09 00:00:00.000000000 Z
34
34
  dependencies:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: aws-sdk
@@ -55,7 +55,9 @@ extra_rdoc_files: []
55
55
  files:
56
56
  - bin/s3-meta-sync
57
57
  - lib/s3_meta_sync.rb
58
+ - lib/s3_meta_sync/syncer.rb
58
59
  - lib/s3_meta_sync/version.rb
60
+ - lib/s3_meta_sync/zip.rb
59
61
  homepage: http://github.com/grosser/s3_meta_sync
60
62
  licenses:
61
63
  - MIT
metadata.gz.sig CHANGED
Binary file