s3_meta_sync 0.13.0 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/s3_meta_sync.rb +2 -0
- data/lib/s3_meta_sync/syncer.rb +98 -30
- data/lib/s3_meta_sync/version.rb +3 -1
- data/lib/s3_meta_sync/zip.rb +3 -1
- metadata +20 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0bb60ae142b42d7c3055e17872c6a512c37aade47ac5773f9e321c002c6f0a7e
|
4
|
+
data.tar.gz: 8cde3dd1297a970208c2acc42a21488a979ff3248806694648699606b1d76913
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be936302e585df271ada643c3f3ba13d54014957659ee3f8a06f9d3b35f4d15df542b32367dd7f31d5086e56ad84ea92753754a3db39f5042bf6f8da3fb0d750
|
7
|
+
data.tar.gz: 584a30fe6eaee8ce3b91f021a7a7afe4ab10620635a540598a3f8c3d833229a0cf1cfdb1099a6bd761dc09e838bff4df517073ddcadfeb17a6fd5b0e122e0c4e
|
data/lib/s3_meta_sync.rb
CHANGED
data/lib/s3_meta_sync/syncer.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "net/http"
|
2
4
|
require "open-uri"
|
3
5
|
require "yaml"
|
@@ -5,17 +7,24 @@ require "digest/md5"
|
|
5
7
|
require "fileutils"
|
6
8
|
require "tmpdir"
|
7
9
|
require "openssl"
|
10
|
+
require "mime/types"
|
8
11
|
|
9
|
-
require "aws-sdk-
|
12
|
+
require "aws-sdk-s3"
|
10
13
|
require "s3_meta_sync/zip"
|
11
14
|
|
12
15
|
module S3MetaSync
|
13
16
|
class Syncer
|
14
|
-
DEFAULT_REGION =
|
17
|
+
DEFAULT_REGION = "us-east-1"
|
15
18
|
STAGING_AREA_PREFIX = "s3ms_"
|
16
19
|
|
20
|
+
AWS_PUBLIC_ACCESS = "public-read"
|
21
|
+
AWS_PRIVATE_ACCESS = "private"
|
22
|
+
|
17
23
|
def initialize(config)
|
18
|
-
@config =
|
24
|
+
@config = {
|
25
|
+
acl: AWS_PUBLIC_ACCESS,
|
26
|
+
region: DEFAULT_REGION
|
27
|
+
}.merge(config)
|
19
28
|
end
|
20
29
|
|
21
30
|
def sync(source, destination)
|
@@ -93,7 +102,7 @@ module S3MetaSync
|
|
93
102
|
# Sometimes SIGTERM causes Dir.mktmpdir to not properly delete the temp folder
|
94
103
|
# Remove 1 day old folders
|
95
104
|
def delete_old_temp_folders
|
96
|
-
path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX +
|
105
|
+
path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX + "*")
|
97
106
|
|
98
107
|
day = 24 * 60 * 60
|
99
108
|
dirs = Dir.glob(path)
|
@@ -161,10 +170,12 @@ module S3MetaSync
|
|
161
170
|
content = Zip.zip(content) if @config[:zip] && path != META_FILE
|
162
171
|
|
163
172
|
object = {
|
173
|
+
acl: @config[:acl],
|
164
174
|
bucket: @bucket,
|
165
175
|
body: content,
|
166
|
-
|
167
|
-
|
176
|
+
content_encoding: content.encoding.to_s,
|
177
|
+
content_type: MIME::Types.of(path).first.to_s,
|
178
|
+
key: "#{destination}/#{path}"
|
168
179
|
}
|
169
180
|
|
170
181
|
object[:server_side_encryption] = @config[:server_side_encryption] if @config[:server_side_encryption]
|
@@ -175,11 +186,15 @@ module S3MetaSync
|
|
175
186
|
def delete_remote_files(remote, paths)
|
176
187
|
paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
|
177
188
|
if paths.any?
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
189
|
+
# keys are limited to 1000 per request: http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Bucket.html#delete_objects-instance_method
|
190
|
+
paths.each_slice(1000) do |sliced_paths|
|
191
|
+
log "Sending request for #{sliced_paths.size} keys"
|
192
|
+
s3.delete_objects(
|
193
|
+
delete: { objects: sliced_paths.map { |path| {key: "#{remote}/#{path}"} } },
|
194
|
+
request_payer: "requester",
|
195
|
+
bucket: @bucket
|
196
|
+
)
|
197
|
+
end
|
183
198
|
end
|
184
199
|
end
|
185
200
|
|
@@ -191,11 +206,19 @@ module S3MetaSync
|
|
191
206
|
end
|
192
207
|
|
193
208
|
def s3
|
194
|
-
@s3 ||=
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
209
|
+
@s3 ||= begin
|
210
|
+
config = { region: @config[:region] }
|
211
|
+
|
212
|
+
if @config[:credentials_path]
|
213
|
+
config[:credentials] = Aws::SharedCredentials.new(path: @config[:credentials_path], profile_name: "default")
|
214
|
+
else
|
215
|
+
config[:access_key_id] = @config[:key]
|
216
|
+
config[:secret_access_key] = @config[:secret]
|
217
|
+
config[:session_token] = @config[:session_token] if @config[:session_token]
|
218
|
+
end
|
219
|
+
|
220
|
+
Aws::S3::Client.new(config)
|
221
|
+
end
|
199
222
|
end
|
200
223
|
|
201
224
|
def generate_meta(source)
|
@@ -226,21 +249,50 @@ module S3MetaSync
|
|
226
249
|
|
227
250
|
def read_meta(source)
|
228
251
|
file = "#{source}/#{META_FILE}"
|
229
|
-
|
252
|
+
if File.exist?(file)
|
253
|
+
content = File.read(file)
|
254
|
+
parse_yaml_content(content) if content.size > 0
|
255
|
+
end
|
230
256
|
end
|
231
257
|
|
232
258
|
def download_meta(destination)
|
259
|
+
if private?
|
260
|
+
private_access_download_meta(destination)
|
261
|
+
else
|
262
|
+
public_access_download_meta(destination)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def private_access_download_meta(destination)
|
267
|
+
content = private_content_download(destination, META_FILE).string
|
268
|
+
|
269
|
+
raise S3MetaSync::RemoteWithoutMeta if content.empty? # if missing, upload everything
|
270
|
+
|
271
|
+
parse_yaml_content(content)
|
272
|
+
rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::AccessDenied # if requesting a file that doesn't exist AccessDenied is raised
|
273
|
+
retries ||= 0
|
274
|
+
|
275
|
+
raise S3MetaSync::RemoteWithoutMeta if retries >= 1
|
276
|
+
|
277
|
+
retries += 1
|
278
|
+
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
|
279
|
+
retry
|
280
|
+
end
|
281
|
+
|
282
|
+
def public_access_download_meta(destination)
|
233
283
|
content = download_content("#{destination}/#{META_FILE}") { |io| io.read }
|
284
|
+
|
285
|
+
raise OpenURI::HTTPError.new("Content is empty", nil) if content.size == 0
|
286
|
+
|
234
287
|
parse_yaml_content(content)
|
235
288
|
rescue OpenURI::HTTPError
|
236
289
|
retries ||= 0
|
290
|
+
|
291
|
+
raise S3MetaSync::RemoteWithoutMeta if retries >= 1
|
292
|
+
|
237
293
|
retries += 1
|
238
|
-
|
239
|
-
|
240
|
-
retry
|
241
|
-
else
|
242
|
-
raise RemoteWithoutMeta
|
243
|
-
end
|
294
|
+
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
|
295
|
+
retry
|
244
296
|
end
|
245
297
|
|
246
298
|
def parse_yaml_content(content)
|
@@ -249,16 +301,29 @@ module S3MetaSync
|
|
249
301
|
end
|
250
302
|
|
251
303
|
def download_file(source, path, destination, zip)
|
252
|
-
download =
|
253
|
-
|
254
|
-
|
255
|
-
|
304
|
+
download = if private?
|
305
|
+
private_content_download(source, path)
|
306
|
+
else
|
307
|
+
public_content_download(source, path)
|
308
|
+
end
|
309
|
+
|
310
|
+
download = S3MetaSync::Zip.unzip(download) if zip
|
311
|
+
FileUtils.mkdir_p(File.dirname("#{destination}/#{path}"))
|
256
312
|
|
257
313
|
# consumes less ram then File.write(path, content), possibly also faster
|
258
|
-
File.open(path,
|
314
|
+
File.open("#{destination}/#{path}", "wb") { |f| IO.copy_stream(download, f) }
|
259
315
|
download.close
|
260
316
|
end
|
261
317
|
|
318
|
+
def private_content_download(source, path)
|
319
|
+
obj = s3.get_object(bucket: @bucket, key: "#{source}/#{path}")
|
320
|
+
obj.body
|
321
|
+
end
|
322
|
+
|
323
|
+
def public_content_download(source, path)
|
324
|
+
download_content("#{source}/#{path}") # warning: using block form consumes more ram
|
325
|
+
end
|
326
|
+
|
262
327
|
def download_content(path)
|
263
328
|
log "Downloading #{path}"
|
264
329
|
url =
|
@@ -283,8 +348,7 @@ module S3MetaSync
|
|
283
348
|
log "#{e.class} error downloading #{url}, retrying #{http_error_retries}/#{max_retries}"
|
284
349
|
retry
|
285
350
|
else
|
286
|
-
|
287
|
-
raise
|
351
|
+
raise $!, "#{$!.message} -- while trying to download #{url}", $!.backtrace
|
288
352
|
end
|
289
353
|
rescue OpenSSL::SSL::SSLError
|
290
354
|
ssl_error_retries ||= 0
|
@@ -336,5 +400,9 @@ module S3MetaSync
|
|
336
400
|
def log(text, important=false)
|
337
401
|
$stderr.puts text if @config[:verbose] or important
|
338
402
|
end
|
403
|
+
|
404
|
+
def private?
|
405
|
+
@config[:acl] == AWS_PRIVATE_ACCESS
|
406
|
+
end
|
339
407
|
end
|
340
408
|
end
|
data/lib/s3_meta_sync/version.rb
CHANGED
data/lib/s3_meta_sync/zip.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "zlib"
|
2
4
|
require "stringio"
|
3
5
|
|
@@ -5,7 +7,7 @@ module S3MetaSync
|
|
5
7
|
module Zip
|
6
8
|
class << self
|
7
9
|
def zip(string)
|
8
|
-
io = StringIO.new("w")
|
10
|
+
io = StringIO.new("w".dup)
|
9
11
|
w_gz = Zlib::GzipWriter.new(io)
|
10
12
|
w_gz.write(string)
|
11
13
|
w_gz.close
|
metadata
CHANGED
@@ -1,29 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: s3_meta_sync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Grosser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: aws-sdk-
|
14
|
+
name: aws-sdk-s3
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mime-types
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
description:
|
28
42
|
email: michael@grosser.it
|
29
43
|
executables:
|
@@ -55,8 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
55
69
|
- !ruby/object:Gem::Version
|
56
70
|
version: '0'
|
57
71
|
requirements: []
|
58
|
-
|
59
|
-
rubygems_version: 2.6.11
|
72
|
+
rubygems_version: 3.1.4
|
60
73
|
signing_key:
|
61
74
|
specification_version: 4
|
62
75
|
summary: Sync folders with s3 using a metadata file and md5 diffs
|