s3_meta_sync 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4c9d56dad09b4d11098f9e57172fe7776c5d17c8
4
- data.tar.gz: 9e93c8e403705699f0aa3d0d78d96d1817d53861
3
+ metadata.gz: 417959a794098b61bbd76e514822749834515e30
4
+ data.tar.gz: b4d407672cd4084340394cfcdd79bb5bba6caead
5
5
  SHA512:
6
- metadata.gz: 4c79cc1844b2a247ea0fc1afc2d0e6db75836448bb048d7fd258882dcf8200ac6661afac214efc9e03b7c97eaa9ef5379c6c2a96efd33ae186fadc2ee6eb4925
7
- data.tar.gz: 5120a5e78050ee13b38b12ef79b05f1ea29f5fb4e120675eeadfa6378719bb18ff493fb8ab0ee1c0e73588d1e91a4d4e48fd8ca8cc3ef2300a04433d3d0aa714
6
+ metadata.gz: ada88904ff3f3035975bfbb123bb5ab7d90a71a1c704c203995290ef7507ccae63875b8836148330c774d33d8d2a501160a26d36d0fd8c21a9eade8c6ad702aa
7
+ data.tar.gz: 4f93506f40bc7ab0f7b8381ccc42a7b334174464fc6e6dfc7f4df10af8d8f32db48f0665345e80f0486e66bee427a8c7a2387d096a385835736c55b7ef703406
checksums.yaml.gz.sig CHANGED
Binary file
@@ -0,0 +1,251 @@
1
+ require "open-uri"
2
+ require "yaml"
3
+ require "digest/md5"
4
+ require "fileutils"
5
+ require "tmpdir"
6
+
7
+ require "aws/s3"
8
+
9
+ if RUBY_VERSION < "2.0.0"
10
+ # need to require these or upload in multiple threads will fail on systems with high load
11
+ require "aws/s3/s3_object"
12
+ require "aws/core/response"
13
+ require "aws/s3/object_collection"
14
+ end
15
+
16
+ require "s3_meta_sync/zip"
17
+
18
+ module S3MetaSync
19
+ class Syncer
20
+ def initialize(config)
21
+ @config = config
22
+ end
23
+
24
+ def sync(source, destination)
25
+ raise if source.end_with?("/") or destination.end_with?("/")
26
+
27
+ if destination.include?(":")
28
+ @bucket, destination = destination.split(":")
29
+ upload(source, destination)
30
+ else
31
+ @bucket, source = source.split(":")
32
+ download(source, destination)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def upload(source, destination)
39
+ corrupted = consume_corrupted_files(source)
40
+ remote_meta = begin
41
+ download_meta(destination)
42
+ rescue RemoteWithoutMeta
43
+ log "Remote has no .s3-meta-sync, uploading everything", true
44
+ {files: {}}
45
+ end
46
+ generate_meta(source)
47
+ local_files = read_meta(source)[:files]
48
+ remote_files = remote_meta[:files]
49
+ upload = if @config[:zip] == remote_meta[:zip]
50
+ local_files.select { |path, md5| remote_files[path] != md5 || corrupted.include?(path) }
51
+ else
52
+ local_files
53
+ end.map(&:first)
54
+ delete = remote_files.keys - local_files.keys
55
+ log "Uploading: #{upload.size} Deleting: #{delete.size}", true
56
+
57
+ upload_files(source, destination, upload)
58
+ delete_remote_files(destination, delete)
59
+ upload_file(source, META_FILE, destination)
60
+ end
61
+
62
+ def download(source, destination)
63
+ raise if @config[:zip]
64
+ remote_meta = download_meta(source)
65
+ generate_meta(destination)
66
+ local_files = read_meta(destination)[:files]
67
+ download = remote_meta[:files].select { |path, md5| local_files[path] != md5 }.map(&:first)
68
+ delete = local_files.keys - remote_meta[:files].keys
69
+
70
+ log "Downloading: #{download.size} Deleting: #{delete.size}", true
71
+
72
+ unless download.empty? && delete.empty?
73
+ Dir.mktmpdir do |staging_area|
74
+ copy_content(destination, staging_area)
75
+ download_files(source, staging_area, download, remote_meta[:zip])
76
+ delete_local_files(staging_area, delete)
77
+ download_file(source, META_FILE, staging_area, false)
78
+ verify_integrity!(staging_area, destination)
79
+ delete_empty_folders(staging_area)
80
+ self.class.swap_in_directory(destination, staging_area)
81
+ FileUtils.mkdir(staging_area)
82
+ end
83
+ end
84
+ end
85
+
86
+ def copy_content(destination, dir)
87
+ system "cp -R #{destination}/* #{dir} 2>/dev/null"
88
+ end
89
+
90
+ # almost atomic when destination and temp dir are not on the same device
91
+ def self.swap_in_directory(destination, dir)
92
+ next_dir = "#{destination}-next"
93
+ delete = "#{destination}-delete"
94
+
95
+ # clean up potential leftovers from last run
96
+ FileUtils.remove_dir(next_dir) if File.exist?(next_dir)
97
+ FileUtils.remove_dir(delete) if File.exist?(delete)
98
+
99
+ # move onto the same device
100
+ FileUtils.mv(dir, next_dir)
101
+
102
+ # copy permissions
103
+ FileUtils.chmod_R(File.stat(destination).mode, next_dir)
104
+
105
+ # swap
106
+ FileUtils.mv(destination, delete)
107
+ FileUtils.mv(next_dir, destination)
108
+
109
+ # cleanup old
110
+ FileUtils.remove_dir(delete)
111
+ end
112
+
113
+ def verify_integrity!(staging_area, destination)
114
+ file = "#{staging_area}/#{META_FILE}"
115
+ remote = YAML.load_file(file)[:files]
116
+ actual = meta_data(staging_area)[:files]
117
+
118
+ if remote != actual
119
+ corrupted = actual.select { |file, md5| remote[file] && remote[file] != md5 }.map(&:first)
120
+ File.write("#{destination}/#{CORRUPT_FILES_LOG}", corrupted.join("\n"))
121
+ log "corrupted files downloaded:\n#{corrupted.join("\n")}", true
122
+ raise RemoteCorrupt
123
+ end
124
+ end
125
+
126
+ def consume_corrupted_files(source)
127
+ log = "#{source}/#{CORRUPT_FILES_LOG}"
128
+ if File.exist?(log)
129
+ corrupted = File.read(log).split("\n")
130
+ log "force uploading #{corrupted.size} corrupted files", true
131
+ File.unlink log
132
+ corrupted
133
+ else
134
+ []
135
+ end
136
+ end
137
+
138
+ def upload_file(source, path, destination)
139
+ log "Uploading #{path}"
140
+ content = File.read("#{source}/#{path}")
141
+ content = Zip.zip(content) if @config[:zip]
142
+ s3.objects["#{destination}/#{path}"].write content, :acl => :public_read
143
+ end
144
+
145
+ def delete_remote_files(remote, paths)
146
+ paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
147
+ s3.objects.delete paths.map { |path| "#{remote}/#{path}" }
148
+ end
149
+
150
+ def delete_local_files(local, paths)
151
+ paths = paths.map { |path| "#{local}/#{path}" }
152
+ paths.each { |path| log "Deleting #{path}" }
153
+ File.delete(*paths)
154
+ end
155
+
156
+ def s3
157
+ @s3 ||= ::AWS::S3.new(
158
+ access_key_id: @config[:key],
159
+ secret_access_key: @config[:secret]
160
+ ).buckets[@bucket]
161
+ end
162
+
163
+ def generate_meta(source)
164
+ file = "#{source}/#{META_FILE}"
165
+ FileUtils.mkdir_p(File.dirname(file))
166
+ File.write(file, meta_data(source).to_yaml)
167
+ end
168
+
169
+ def meta_data(source)
170
+ return {} unless File.directory?(source)
171
+ files = Dir.chdir(source) do
172
+ files = Dir["**/*"].select { |f| File.file?(f) }
173
+ Hash[files.map { |file| [file, Digest::MD5.file(file).to_s] }]
174
+ end
175
+ {files: files}
176
+ end
177
+
178
+ def read_meta(source)
179
+ file = "#{source}/#{META_FILE}"
180
+ File.exist?(file) ? YAML.load(File.read(file)) : {}
181
+ end
182
+
183
+ def download_meta(destination)
184
+ content = download_content("#{destination}/#{META_FILE}")
185
+ result = YAML.load(content)
186
+ result.key?(:files) ? result : {files: result} # support new an old format
187
+ rescue
188
+ raise RemoteWithoutMeta
189
+ end
190
+
191
+ def download_file(source, path, destination, zip)
192
+ content = download_content("#{source}/#{path}")
193
+ content = Zip.unzip(content) if zip
194
+ file = "#{destination}/#{path}"
195
+ FileUtils.mkdir_p(File.dirname(file))
196
+ File.write(file, content, :encoding => content.encoding)
197
+ end
198
+
199
+ def download_content(path)
200
+ log "Downloading #{path}"
201
+ url = "https://s3#{"-#{region}" if region}.amazonaws.com/#{@bucket}/#{path}"
202
+ options = (@config[:ssl_none] ? {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE} : {})
203
+ open(url, options).read
204
+ rescue OpenURI::HTTPError
205
+ raise "Unable to download #{url} -- #{$!}"
206
+ rescue OpenSSL::SSL::SSLError
207
+ retries ||= 0
208
+ retries += 1
209
+ if retries == 1
210
+ log "SSL error downloading #{path}, retrying"
211
+ retry
212
+ else
213
+ raise
214
+ end
215
+ end
216
+
217
+ def delete_empty_folders(destination)
218
+ `find #{destination} -depth -empty -delete`
219
+ end
220
+
221
+ def download_files(source, destination, paths, zip)
222
+ in_multiple_threads(paths) do |path|
223
+ download_file(source, path, destination, zip)
224
+ end
225
+ end
226
+
227
+ def upload_files(source, destination, paths)
228
+ in_multiple_threads(paths) { |path| upload_file(source, path, destination) }
229
+ end
230
+
231
+ def region
232
+ @config[:region] unless @config[:region].to_s.empty?
233
+ end
234
+
235
+ def in_multiple_threads(data)
236
+ threads = [@config[:parallel] || 10, data.size].min
237
+ data = data.dup
238
+ (0...threads).to_a.map do
239
+ Thread.new do
240
+ while slice = data.shift
241
+ yield slice
242
+ end
243
+ end
244
+ end.each(&:join)
245
+ end
246
+
247
+ def log(text, important=false)
248
+ $stderr.puts text if @config[:verbose] or important
249
+ end
250
+ end
251
+ end
@@ -1,3 +1,3 @@
1
1
  module S3MetaSync
2
- VERSION = "0.3.6"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -0,0 +1,20 @@
1
+ require "zlib"
2
+ require "stringio"
3
+
4
+ module S3MetaSync
5
+ module Zip
6
+ class << self
7
+ def zip(string)
8
+ io = StringIO.new("w")
9
+ w_gz = Zlib::GzipWriter.new(io)
10
+ w_gz.write(string)
11
+ w_gz.close
12
+ io.string
13
+ end
14
+
15
+ def unzip(string)
16
+ Zlib::GzipReader.new(StringIO.new(string, "rb")).read
17
+ end
18
+ end
19
+ end
20
+ end
data/lib/s3_meta_sync.rb CHANGED
@@ -1,19 +1,6 @@
1
- require "s3_meta_sync/version"
2
- require "open-uri"
3
- require "yaml"
4
- require "digest/md5"
5
1
  require "optparse"
6
- require "fileutils"
7
- require "tmpdir"
8
-
9
- require "aws/s3"
10
-
11
- if RUBY_VERSION < "2.0.0"
12
- # need to require these or upload in multiple threads will fail on systems with high load
13
- require "aws/s3/s3_object"
14
- require "aws/core/response"
15
- require "aws/s3/object_collection"
16
- end
2
+ require "s3_meta_sync/version"
3
+ require "s3_meta_sync/syncer"
17
4
 
18
5
  module S3MetaSync
19
6
  RemoteWithoutMeta = Class.new(Exception)
@@ -21,223 +8,6 @@ module S3MetaSync
21
8
  META_FILE = ".s3-meta-sync"
22
9
  CORRUPT_FILES_LOG = "s3-meta-sync-corrupted.log"
23
10
 
24
- class Syncer
25
- def initialize(config)
26
- @config = config
27
- end
28
-
29
- def sync(source, destination)
30
- raise if source.end_with?("/") or destination.end_with?("/")
31
-
32
- if destination.include?(":")
33
- @bucket, destination = destination.split(":")
34
- upload(source, destination)
35
- else
36
- @bucket, source = source.split(":")
37
- download(source, destination)
38
- end
39
- end
40
-
41
- private
42
-
43
- def upload(source, destination)
44
- corrupted = consume_corrupted_files(source)
45
- remote_info = begin
46
- download_meta(destination)
47
- rescue RemoteWithoutMeta
48
- log "Remote has no .s3-meta-sync, uploading everything", true
49
- {}
50
- end
51
- generate_meta(source)
52
- local_info = read_meta(source)
53
- upload = local_info.select { |path, md5| remote_info[path] != md5 || corrupted.include?(path) }.map(&:first)
54
- delete = remote_info.keys - local_info.keys
55
- log "Uploading: #{upload.size} Deleting: #{delete.size}", true
56
-
57
- upload_files(source, destination, upload)
58
- delete_remote_files(destination, delete)
59
- upload_file(source, META_FILE, destination)
60
- end
61
-
62
- def download(source, destination)
63
- remote_info = download_meta(source)
64
- generate_meta(destination)
65
- local_info = read_meta(destination)
66
- download = remote_info.select { |path, md5| local_info[path] != md5 }.map(&:first)
67
- delete = local_info.keys - remote_info.keys
68
-
69
- log "Downloading: #{download.size} Deleting: #{delete.size}", true
70
-
71
- unless download.empty? && delete.empty?
72
- Dir.mktmpdir do |staging_area|
73
- copy_content(destination, staging_area)
74
- download_files(source, staging_area, download)
75
- delete_local_files(staging_area, delete)
76
- download_file(source, META_FILE, staging_area)
77
- verify_integrity!(staging_area, destination)
78
- delete_empty_folders(staging_area)
79
- self.class.swap_in_directory(destination, staging_area)
80
- FileUtils.mkdir(staging_area)
81
- end
82
- end
83
- end
84
-
85
- def copy_content(destination, dir)
86
- system "cp -R #{destination}/* #{dir} 2>/dev/null"
87
- end
88
-
89
- # almost atomic when destination and temp dir are not on the same device
90
- def self.swap_in_directory(destination, dir)
91
- next_dir = "#{destination}-next"
92
- delete = "#{destination}-delete"
93
-
94
- # clean up potential leftovers from last run
95
- FileUtils.remove_dir(next_dir) if File.exist?(next_dir)
96
- FileUtils.remove_dir(delete) if File.exist?(delete)
97
-
98
- # move onto the same device
99
- FileUtils.mv(dir, next_dir)
100
-
101
- # copy permissions
102
- FileUtils.chmod_R(File.stat(destination).mode, next_dir)
103
-
104
- # swap
105
- FileUtils.mv(destination, delete)
106
- FileUtils.mv(next_dir, destination)
107
-
108
- # cleanup old
109
- FileUtils.remove_dir(delete)
110
- end
111
-
112
- def verify_integrity!(staging_area, destination)
113
- file = "#{staging_area}/#{META_FILE}"
114
- remote = YAML.load_file(file)
115
- actual = meta_data(staging_area)
116
-
117
- if remote != actual
118
- corrupted = actual.select { |file, md5| remote[file] && remote[file] != md5 }.map(&:first)
119
- File.write("#{destination}/#{CORRUPT_FILES_LOG}", corrupted.join("\n"))
120
- log "corrupted files downloaded:\n#{corrupted.join("\n")}", true
121
- raise RemoteCorrupt
122
- end
123
- end
124
-
125
- def consume_corrupted_files(source)
126
- log = "#{source}/#{CORRUPT_FILES_LOG}"
127
- if File.exist?(log)
128
- corrupted = File.read(log).split("\n")
129
- log "force uploading #{corrupted.size} corrupted files", true
130
- File.unlink log
131
- corrupted
132
- else
133
- []
134
- end
135
- end
136
-
137
- def upload_file(source, path, destination)
138
- log "Uploading #{path}"
139
- s3.objects["#{destination}/#{path}"].write File.read("#{source}/#{path}"), :acl => :public_read
140
- end
141
-
142
- def delete_remote_files(remote, paths)
143
- paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
144
- s3.objects.delete paths.map { |path| "#{remote}/#{path}" }
145
- end
146
-
147
- def delete_local_files(local, paths)
148
- paths = paths.map { |path| "#{local}/#{path}" }
149
- paths.each { |path| log "Deleting #{path}" }
150
- File.delete(*paths)
151
- end
152
-
153
- def s3
154
- @s3 ||= ::AWS::S3.new(:access_key_id => @config[:key], :secret_access_key => @config[:secret]).buckets[@bucket]
155
- end
156
-
157
- def generate_meta(source)
158
- file = "#{source}/#{META_FILE}"
159
- FileUtils.mkdir_p(File.dirname(file))
160
- File.write(file, meta_data(source).to_yaml)
161
- end
162
-
163
- def meta_data(source)
164
- return {} unless File.directory?(source)
165
- Dir.chdir(source) do
166
- files = Dir["**/*"].select { |f| File.file?(f) }
167
- Hash[files.map { |file| [file, Digest::MD5.file(file).to_s] }]
168
- end
169
- end
170
-
171
- def read_meta(source)
172
- file = "#{source}/#{META_FILE}"
173
- File.exist?(file) ? YAML.load(File.read(file)) : {}
174
- end
175
-
176
- def download_meta(destination)
177
- content = download_content("#{destination}/#{META_FILE}")
178
- YAML.load(content)
179
- rescue
180
- raise RemoteWithoutMeta
181
- end
182
-
183
- def download_file(source, path, destination)
184
- content = download_content("#{source}/#{path}")
185
- file = "#{destination}/#{path}"
186
- FileUtils.mkdir_p(File.dirname(file))
187
- File.write(file, content, :encoding => content.encoding)
188
- end
189
-
190
- def download_content(path)
191
- log "Downloading #{path}"
192
- url = "https://s3#{"-#{region}" if region}.amazonaws.com/#{@bucket}/#{path}"
193
- options = (@config[:ssl_none] ? {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE} : {})
194
- open(url, options).read
195
- rescue OpenURI::HTTPError
196
- raise "Unable to download #{url} -- #{$!}"
197
- rescue OpenSSL::SSL::SSLError
198
- retries ||= 0
199
- retries += 1
200
- if retries == 1
201
- log "SSL error downloading #{path}, retrying"
202
- retry
203
- else
204
- raise
205
- end
206
- end
207
-
208
- def delete_empty_folders(destination)
209
- `find #{destination} -depth -empty -delete`
210
- end
211
-
212
- def download_files(source, destination, paths)
213
- in_multiple_threads(paths) { |path| download_file(source, path, destination) }
214
- end
215
-
216
- def upload_files(source, destination, paths)
217
- in_multiple_threads(paths) { |path| upload_file(source, path, destination) }
218
- end
219
-
220
- def region
221
- @config[:region] unless @config[:region].to_s.empty?
222
- end
223
-
224
- def in_multiple_threads(data)
225
- threads = [@config[:parallel] || 10, data.size].min
226
- data = data.dup
227
- (0...threads).to_a.map do
228
- Thread.new do
229
- while slice = data.shift
230
- yield slice
231
- end
232
- end
233
- end.each(&:join)
234
- end
235
-
236
- def log(text, important=false)
237
- $stderr.puts text if @config[:verbose] or important
238
- end
239
- end
240
-
241
11
  class << self
242
12
  def run(argv)
243
13
  source, dest, options = parse_options(argv)
@@ -247,8 +17,9 @@ module S3MetaSync
247
17
 
248
18
  def parse_options(argv)
249
19
  options = {
250
- :key => ENV["AWS_ACCESS_KEY_ID"],
251
- :secret => ENV["AWS_SECRET_ACCESS_KEY"]
20
+ key: ENV["AWS_ACCESS_KEY_ID"],
21
+ secret: ENV["AWS_SECRET_ACCESS_KEY"],
22
+ zip: false,
252
23
  }
253
24
  OptionParser.new do |opts|
254
25
  opts.banner = <<-BANNER.gsub(/^ {10}/, "")
@@ -268,7 +39,8 @@ module S3MetaSync
268
39
  opts.on("-s", "--secret SECRET", "AWS secret key") { |c| options[:secret] = c }
269
40
  opts.on("-r", "--region REGION", "AWS region if not us-standard") { |c| options[:region] = c }
270
41
  opts.on("-p", "--parallel COUNT", Integer, "Use COUNT threads for download/upload default: 10") { |c| options[:parallel] = c }
271
- opts.on("--ssl-none", "Do not verify ssl certs") { |c| options[:ssl_none] = true }
42
+ opts.on("--ssl-none", "Do not verify ssl certs") { options[:ssl_none] = true }
43
+ opts.on("-z", "--zip", "Zip when uploading to save bandwidth") { options[:zip] = true }
272
44
  opts.on("-V", "--verbose", "Verbose mode"){ options[:verbose] = true }
273
45
  opts.on("-h", "--help", "Show this.") { puts opts; exit }
274
46
  opts.on("-v", "--version", "Show Version"){ puts VERSION; exit}
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3_meta_sync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Grosser
@@ -30,7 +30,7 @@ cert_chain:
30
30
  F5etKHZg0j3eHO31/i2HnswY04lqGImUu6aM5EnijFTB7PPW2KwKKM4+kKDYFdlw
31
31
  /0WV1Ng2/Y6qsHwmqGg2VlYj2h4=
32
32
  -----END CERTIFICATE-----
33
- date: 2014-11-19 00:00:00.000000000 Z
33
+ date: 2014-12-09 00:00:00.000000000 Z
34
34
  dependencies:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: aws-sdk
@@ -55,7 +55,9 @@ extra_rdoc_files: []
55
55
  files:
56
56
  - bin/s3-meta-sync
57
57
  - lib/s3_meta_sync.rb
58
+ - lib/s3_meta_sync/syncer.rb
58
59
  - lib/s3_meta_sync/version.rb
60
+ - lib/s3_meta_sync/zip.rb
59
61
  homepage: http://github.com/grosser/s3_meta_sync
60
62
  licenses:
61
63
  - MIT
metadata.gz.sig CHANGED
Binary file