s3_meta_sync 0.13.0 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/s3_meta_sync.rb +2 -0
- data/lib/s3_meta_sync/syncer.rb +98 -30
- data/lib/s3_meta_sync/version.rb +3 -1
- data/lib/s3_meta_sync/zip.rb +3 -1
- metadata +20 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0bb60ae142b42d7c3055e17872c6a512c37aade47ac5773f9e321c002c6f0a7e
|
4
|
+
data.tar.gz: 8cde3dd1297a970208c2acc42a21488a979ff3248806694648699606b1d76913
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be936302e585df271ada643c3f3ba13d54014957659ee3f8a06f9d3b35f4d15df542b32367dd7f31d5086e56ad84ea92753754a3db39f5042bf6f8da3fb0d750
|
7
|
+
data.tar.gz: 584a30fe6eaee8ce3b91f021a7a7afe4ab10620635a540598a3f8c3d833229a0cf1cfdb1099a6bd761dc09e838bff4df517073ddcadfeb17a6fd5b0e122e0c4e
|
data/lib/s3_meta_sync.rb
CHANGED
data/lib/s3_meta_sync/syncer.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "net/http"
|
2
4
|
require "open-uri"
|
3
5
|
require "yaml"
|
@@ -5,17 +7,24 @@ require "digest/md5"
|
|
5
7
|
require "fileutils"
|
6
8
|
require "tmpdir"
|
7
9
|
require "openssl"
|
10
|
+
require "mime/types"
|
8
11
|
|
9
|
-
require "aws-sdk-
|
12
|
+
require "aws-sdk-s3"
|
10
13
|
require "s3_meta_sync/zip"
|
11
14
|
|
12
15
|
module S3MetaSync
|
13
16
|
class Syncer
|
14
|
-
DEFAULT_REGION =
|
17
|
+
DEFAULT_REGION = "us-east-1"
|
15
18
|
STAGING_AREA_PREFIX = "s3ms_"
|
16
19
|
|
20
|
+
AWS_PUBLIC_ACCESS = "public-read"
|
21
|
+
AWS_PRIVATE_ACCESS = "private"
|
22
|
+
|
17
23
|
def initialize(config)
|
18
|
-
@config =
|
24
|
+
@config = {
|
25
|
+
acl: AWS_PUBLIC_ACCESS,
|
26
|
+
region: DEFAULT_REGION
|
27
|
+
}.merge(config)
|
19
28
|
end
|
20
29
|
|
21
30
|
def sync(source, destination)
|
@@ -93,7 +102,7 @@ module S3MetaSync
|
|
93
102
|
# Sometimes SIGTERM causes Dir.mktmpdir to not properly delete the temp folder
|
94
103
|
# Remove 1 day old folders
|
95
104
|
def delete_old_temp_folders
|
96
|
-
path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX +
|
105
|
+
path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX + "*")
|
97
106
|
|
98
107
|
day = 24 * 60 * 60
|
99
108
|
dirs = Dir.glob(path)
|
@@ -161,10 +170,12 @@ module S3MetaSync
|
|
161
170
|
content = Zip.zip(content) if @config[:zip] && path != META_FILE
|
162
171
|
|
163
172
|
object = {
|
173
|
+
acl: @config[:acl],
|
164
174
|
bucket: @bucket,
|
165
175
|
body: content,
|
166
|
-
|
167
|
-
|
176
|
+
content_encoding: content.encoding.to_s,
|
177
|
+
content_type: MIME::Types.of(path).first.to_s,
|
178
|
+
key: "#{destination}/#{path}"
|
168
179
|
}
|
169
180
|
|
170
181
|
object[:server_side_encryption] = @config[:server_side_encryption] if @config[:server_side_encryption]
|
@@ -175,11 +186,15 @@ module S3MetaSync
|
|
175
186
|
def delete_remote_files(remote, paths)
|
176
187
|
paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
|
177
188
|
if paths.any?
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
189
|
+
# keys are limited to 1000 per request: http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Bucket.html#delete_objects-instance_method
|
190
|
+
paths.each_slice(1000) do |sliced_paths|
|
191
|
+
log "Sending request for #{sliced_paths.size} keys"
|
192
|
+
s3.delete_objects(
|
193
|
+
delete: { objects: sliced_paths.map { |path| {key: "#{remote}/#{path}"} } },
|
194
|
+
request_payer: "requester",
|
195
|
+
bucket: @bucket
|
196
|
+
)
|
197
|
+
end
|
183
198
|
end
|
184
199
|
end
|
185
200
|
|
@@ -191,11 +206,19 @@ module S3MetaSync
|
|
191
206
|
end
|
192
207
|
|
193
208
|
def s3
|
194
|
-
@s3 ||=
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
209
|
+
@s3 ||= begin
|
210
|
+
config = { region: @config[:region] }
|
211
|
+
|
212
|
+
if @config[:credentials_path]
|
213
|
+
config[:credentials] = Aws::SharedCredentials.new(path: @config[:credentials_path], profile_name: "default")
|
214
|
+
else
|
215
|
+
config[:access_key_id] = @config[:key]
|
216
|
+
config[:secret_access_key] = @config[:secret]
|
217
|
+
config[:session_token] = @config[:session_token] if @config[:session_token]
|
218
|
+
end
|
219
|
+
|
220
|
+
Aws::S3::Client.new(config)
|
221
|
+
end
|
199
222
|
end
|
200
223
|
|
201
224
|
def generate_meta(source)
|
@@ -226,21 +249,50 @@ module S3MetaSync
|
|
226
249
|
|
227
250
|
def read_meta(source)
|
228
251
|
file = "#{source}/#{META_FILE}"
|
229
|
-
|
252
|
+
if File.exist?(file)
|
253
|
+
content = File.read(file)
|
254
|
+
parse_yaml_content(content) if content.size > 0
|
255
|
+
end
|
230
256
|
end
|
231
257
|
|
232
258
|
def download_meta(destination)
|
259
|
+
if private?
|
260
|
+
private_access_download_meta(destination)
|
261
|
+
else
|
262
|
+
public_access_download_meta(destination)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def private_access_download_meta(destination)
|
267
|
+
content = private_content_download(destination, META_FILE).string
|
268
|
+
|
269
|
+
raise S3MetaSync::RemoteWithoutMeta if content.empty? # if missing, upload everything
|
270
|
+
|
271
|
+
parse_yaml_content(content)
|
272
|
+
rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::AccessDenied # if requesting a file that doesn't exist AccessDenied is raised
|
273
|
+
retries ||= 0
|
274
|
+
|
275
|
+
raise S3MetaSync::RemoteWithoutMeta if retries >= 1
|
276
|
+
|
277
|
+
retries += 1
|
278
|
+
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
|
279
|
+
retry
|
280
|
+
end
|
281
|
+
|
282
|
+
def public_access_download_meta(destination)
|
233
283
|
content = download_content("#{destination}/#{META_FILE}") { |io| io.read }
|
284
|
+
|
285
|
+
raise OpenURI::HTTPError.new("Content is empty", nil) if content.size == 0
|
286
|
+
|
234
287
|
parse_yaml_content(content)
|
235
288
|
rescue OpenURI::HTTPError
|
236
289
|
retries ||= 0
|
290
|
+
|
291
|
+
raise S3MetaSync::RemoteWithoutMeta if retries >= 1
|
292
|
+
|
237
293
|
retries += 1
|
238
|
-
|
239
|
-
|
240
|
-
retry
|
241
|
-
else
|
242
|
-
raise RemoteWithoutMeta
|
243
|
-
end
|
294
|
+
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
|
295
|
+
retry
|
244
296
|
end
|
245
297
|
|
246
298
|
def parse_yaml_content(content)
|
@@ -249,16 +301,29 @@ module S3MetaSync
|
|
249
301
|
end
|
250
302
|
|
251
303
|
def download_file(source, path, destination, zip)
|
252
|
-
download =
|
253
|
-
|
254
|
-
|
255
|
-
|
304
|
+
download = if private?
|
305
|
+
private_content_download(source, path)
|
306
|
+
else
|
307
|
+
public_content_download(source, path)
|
308
|
+
end
|
309
|
+
|
310
|
+
download = S3MetaSync::Zip.unzip(download) if zip
|
311
|
+
FileUtils.mkdir_p(File.dirname("#{destination}/#{path}"))
|
256
312
|
|
257
313
|
# consumes less ram then File.write(path, content), possibly also faster
|
258
|
-
File.open(path,
|
314
|
+
File.open("#{destination}/#{path}", "wb") { |f| IO.copy_stream(download, f) }
|
259
315
|
download.close
|
260
316
|
end
|
261
317
|
|
318
|
+
def private_content_download(source, path)
|
319
|
+
obj = s3.get_object(bucket: @bucket, key: "#{source}/#{path}")
|
320
|
+
obj.body
|
321
|
+
end
|
322
|
+
|
323
|
+
def public_content_download(source, path)
|
324
|
+
download_content("#{source}/#{path}") # warning: using block form consumes more ram
|
325
|
+
end
|
326
|
+
|
262
327
|
def download_content(path)
|
263
328
|
log "Downloading #{path}"
|
264
329
|
url =
|
@@ -283,8 +348,7 @@ module S3MetaSync
|
|
283
348
|
log "#{e.class} error downloading #{url}, retrying #{http_error_retries}/#{max_retries}"
|
284
349
|
retry
|
285
350
|
else
|
286
|
-
|
287
|
-
raise
|
351
|
+
raise $!, "#{$!.message} -- while trying to download #{url}", $!.backtrace
|
288
352
|
end
|
289
353
|
rescue OpenSSL::SSL::SSLError
|
290
354
|
ssl_error_retries ||= 0
|
@@ -336,5 +400,9 @@ module S3MetaSync
|
|
336
400
|
def log(text, important=false)
|
337
401
|
$stderr.puts text if @config[:verbose] or important
|
338
402
|
end
|
403
|
+
|
404
|
+
def private?
|
405
|
+
@config[:acl] == AWS_PRIVATE_ACCESS
|
406
|
+
end
|
339
407
|
end
|
340
408
|
end
|
data/lib/s3_meta_sync/version.rb
CHANGED
data/lib/s3_meta_sync/zip.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "zlib"
|
2
4
|
require "stringio"
|
3
5
|
|
@@ -5,7 +7,7 @@ module S3MetaSync
|
|
5
7
|
module Zip
|
6
8
|
class << self
|
7
9
|
def zip(string)
|
8
|
-
io = StringIO.new("w")
|
10
|
+
io = StringIO.new("w".dup)
|
9
11
|
w_gz = Zlib::GzipWriter.new(io)
|
10
12
|
w_gz.write(string)
|
11
13
|
w_gz.close
|
metadata
CHANGED
@@ -1,29 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: s3_meta_sync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Grosser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: aws-sdk-
|
14
|
+
name: aws-sdk-s3
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mime-types
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
description:
|
28
42
|
email: michael@grosser.it
|
29
43
|
executables:
|
@@ -55,8 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
55
69
|
- !ruby/object:Gem::Version
|
56
70
|
version: '0'
|
57
71
|
requirements: []
|
58
|
-
|
59
|
-
rubygems_version: 2.6.11
|
72
|
+
rubygems_version: 3.1.4
|
60
73
|
signing_key:
|
61
74
|
specification_version: 4
|
62
75
|
summary: Sync folders with s3 using a metadata file and md5 diffs
|