s3_meta_sync 0.13.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 5a615961aa602927cb18e0e70d56be412f6dee9e
4
- data.tar.gz: 745648bd849c20d6ad82f2617fe5363d1f5218a9
2
+ SHA256:
3
+ metadata.gz: 0bb60ae142b42d7c3055e17872c6a512c37aade47ac5773f9e321c002c6f0a7e
4
+ data.tar.gz: 8cde3dd1297a970208c2acc42a21488a979ff3248806694648699606b1d76913
5
5
  SHA512:
6
- metadata.gz: 523b3f6b26a1d690be90cb2af4762560a8ddee28da87bf222d82089b789f9ea824bc06ec574fd42a013f8f4732eafeb60e58d3b30061719e91d2472c3c104f25
7
- data.tar.gz: 351f466424af9a242b9f29b323bbcba66d6347581a1da412bb0a53da4f33c89d1cecaecd58a08ff8678ceed8264afc9ef957874a8d26de492e022496b07494de
6
+ metadata.gz: be936302e585df271ada643c3f3ba13d54014957659ee3f8a06f9d3b35f4d15df542b32367dd7f31d5086e56ad84ea92753754a3db39f5042bf6f8da3fb0d750
7
+ data.tar.gz: 584a30fe6eaee8ce3b91f021a7a7afe4ab10620635a540598a3f8c3d833229a0cf1cfdb1099a6bd761dc09e838bff4df517073ddcadfeb17a6fd5b0e122e0c4e
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "optparse"
2
4
  require "s3_meta_sync/version"
3
5
  require "s3_meta_sync/syncer"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "net/http"
2
4
  require "open-uri"
3
5
  require "yaml"
@@ -5,17 +7,24 @@ require "digest/md5"
5
7
  require "fileutils"
6
8
  require "tmpdir"
7
9
  require "openssl"
10
+ require "mime/types"
8
11
 
9
- require "aws-sdk-core"
12
+ require "aws-sdk-s3"
10
13
  require "s3_meta_sync/zip"
11
14
 
12
15
  module S3MetaSync
13
16
  class Syncer
14
- DEFAULT_REGION = 'us-east-1'
17
+ DEFAULT_REGION = "us-east-1"
15
18
  STAGING_AREA_PREFIX = "s3ms_"
16
19
 
20
+ AWS_PUBLIC_ACCESS = "public-read"
21
+ AWS_PRIVATE_ACCESS = "private"
22
+
17
23
  def initialize(config)
18
- @config = config
24
+ @config = {
25
+ acl: AWS_PUBLIC_ACCESS,
26
+ region: DEFAULT_REGION
27
+ }.merge(config)
19
28
  end
20
29
 
21
30
  def sync(source, destination)
@@ -93,7 +102,7 @@ module S3MetaSync
93
102
  # Sometimes SIGTERM causes Dir.mktmpdir to not properly delete the temp folder
94
103
  # Remove 1 day old folders
95
104
  def delete_old_temp_folders
96
- path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX + '*')
105
+ path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX + "*")
97
106
 
98
107
  day = 24 * 60 * 60
99
108
  dirs = Dir.glob(path)
@@ -161,10 +170,12 @@ module S3MetaSync
161
170
  content = Zip.zip(content) if @config[:zip] && path != META_FILE
162
171
 
163
172
  object = {
173
+ acl: @config[:acl],
164
174
  bucket: @bucket,
165
175
  body: content,
166
- key: "#{destination}/#{path}",
167
- acl: 'public-read'
176
+ content_encoding: content.encoding.to_s,
177
+ content_type: MIME::Types.of(path).first.to_s,
178
+ key: "#{destination}/#{path}"
168
179
  }
169
180
 
170
181
  object[:server_side_encryption] = @config[:server_side_encryption] if @config[:server_side_encryption]
@@ -175,11 +186,15 @@ module S3MetaSync
175
186
  def delete_remote_files(remote, paths)
176
187
  paths.each { |path| log "Deleting #{@bucket}:#{remote}/#{path}" }
177
188
  if paths.any?
178
- s3.delete_objects(
179
- delete: { objects: paths.map { |path| {key: "#{remote}/#{path}"} } },
180
- request_payer: "requester",
181
- bucket: @bucket
182
- )
189
+ # keys are limited to 1000 per request: http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Bucket.html#delete_objects-instance_method
190
+ paths.each_slice(1000) do |sliced_paths|
191
+ log "Sending request for #{sliced_paths.size} keys"
192
+ s3.delete_objects(
193
+ delete: { objects: sliced_paths.map { |path| {key: "#{remote}/#{path}"} } },
194
+ request_payer: "requester",
195
+ bucket: @bucket
196
+ )
197
+ end
183
198
  end
184
199
  end
185
200
 
@@ -191,11 +206,19 @@ module S3MetaSync
191
206
  end
192
207
 
193
208
  def s3
194
- @s3 ||= ::Aws::S3::Client.new(
195
- access_key_id: @config[:key],
196
- secret_access_key: @config[:secret],
197
- region: @config[:region] || 'us-west-2'
198
- )
209
+ @s3 ||= begin
210
+ config = { region: @config[:region] }
211
+
212
+ if @config[:credentials_path]
213
+ config[:credentials] = Aws::SharedCredentials.new(path: @config[:credentials_path], profile_name: "default")
214
+ else
215
+ config[:access_key_id] = @config[:key]
216
+ config[:secret_access_key] = @config[:secret]
217
+ config[:session_token] = @config[:session_token] if @config[:session_token]
218
+ end
219
+
220
+ Aws::S3::Client.new(config)
221
+ end
199
222
  end
200
223
 
201
224
  def generate_meta(source)
@@ -226,21 +249,50 @@ module S3MetaSync
226
249
 
227
250
  def read_meta(source)
228
251
  file = "#{source}/#{META_FILE}"
229
- parse_yaml_content(File.read(file)) if File.exist?(file)
252
+ if File.exist?(file)
253
+ content = File.read(file)
254
+ parse_yaml_content(content) if content.size > 0
255
+ end
230
256
  end
231
257
 
232
258
  def download_meta(destination)
259
+ if private?
260
+ private_access_download_meta(destination)
261
+ else
262
+ public_access_download_meta(destination)
263
+ end
264
+ end
265
+
266
+ def private_access_download_meta(destination)
267
+ content = private_content_download(destination, META_FILE).string
268
+
269
+ raise S3MetaSync::RemoteWithoutMeta if content.empty? # if missing, upload everything
270
+
271
+ parse_yaml_content(content)
272
+ rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::AccessDenied # if requesting a file that doesn't exist AccessDenied is raised
273
+ retries ||= 0
274
+
275
+ raise S3MetaSync::RemoteWithoutMeta if retries >= 1
276
+
277
+ retries += 1
278
+ sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
279
+ retry
280
+ end
281
+
282
+ def public_access_download_meta(destination)
233
283
  content = download_content("#{destination}/#{META_FILE}") { |io| io.read }
284
+
285
+ raise OpenURI::HTTPError.new("Content is empty", nil) if content.size == 0
286
+
234
287
  parse_yaml_content(content)
235
288
  rescue OpenURI::HTTPError
236
289
  retries ||= 0
290
+
291
+ raise S3MetaSync::RemoteWithoutMeta if retries >= 1
292
+
237
293
  retries += 1
238
- if retries <= 1
239
- sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
240
- retry
241
- else
242
- raise RemoteWithoutMeta
243
- end
294
+ sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
295
+ retry
244
296
  end
245
297
 
246
298
  def parse_yaml_content(content)
@@ -249,16 +301,29 @@ module S3MetaSync
249
301
  end
250
302
 
251
303
  def download_file(source, path, destination, zip)
252
- download = download_content("#{source}/#{path}") # warning: using block form consumes more ram
253
- download = Zip.unzip(download) if zip
254
- path = "#{destination}/#{path}"
255
- FileUtils.mkdir_p(File.dirname(path))
304
+ download = if private?
305
+ private_content_download(source, path)
306
+ else
307
+ public_content_download(source, path)
308
+ end
309
+
310
+ download = S3MetaSync::Zip.unzip(download) if zip
311
+ FileUtils.mkdir_p(File.dirname("#{destination}/#{path}"))
256
312
 
257
313
  # consumes less ram then File.write(path, content), possibly also faster
258
- File.open(path, 'wb') { |f| IO.copy_stream(download, f) }
314
+ File.open("#{destination}/#{path}", "wb") { |f| IO.copy_stream(download, f) }
259
315
  download.close
260
316
  end
261
317
 
318
+ def private_content_download(source, path)
319
+ obj = s3.get_object(bucket: @bucket, key: "#{source}/#{path}")
320
+ obj.body
321
+ end
322
+
323
+ def public_content_download(source, path)
324
+ download_content("#{source}/#{path}") # warning: using block form consumes more ram
325
+ end
326
+
262
327
  def download_content(path)
263
328
  log "Downloading #{path}"
264
329
  url =
@@ -283,8 +348,7 @@ module S3MetaSync
283
348
  log "#{e.class} error downloading #{url}, retrying #{http_error_retries}/#{max_retries}"
284
349
  retry
285
350
  else
286
- $!.message << " -- while trying to download #{url}"
287
- raise
351
+ raise $!, "#{$!.message} -- while trying to download #{url}", $!.backtrace
288
352
  end
289
353
  rescue OpenSSL::SSL::SSLError
290
354
  ssl_error_retries ||= 0
@@ -336,5 +400,9 @@ module S3MetaSync
336
400
  def log(text, important=false)
337
401
  $stderr.puts text if @config[:verbose] or important
338
402
  end
403
+
404
+ def private?
405
+ @config[:acl] == AWS_PRIVATE_ACCESS
406
+ end
339
407
  end
340
408
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module S3MetaSync
2
- VERSION = "0.13.0"
4
+ VERSION = "0.15.1"
3
5
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "zlib"
2
4
  require "stringio"
3
5
 
@@ -5,7 +7,7 @@ module S3MetaSync
5
7
  module Zip
6
8
  class << self
7
9
  def zip(string)
8
- io = StringIO.new("w")
10
+ io = StringIO.new("w".dup)
9
11
  w_gz = Zlib::GzipWriter.new(io)
10
12
  w_gz.write(string)
11
13
  w_gz.close
metadata CHANGED
@@ -1,29 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3_meta_sync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.15.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Grosser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-05 00:00:00.000000000 Z
11
+ date: 2020-08-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: aws-sdk-core
14
+ name: aws-sdk-s3
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.0'
19
+ version: '1.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: mime-types
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  description:
28
42
  email: michael@grosser.it
29
43
  executables:
@@ -55,8 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
55
69
  - !ruby/object:Gem::Version
56
70
  version: '0'
57
71
  requirements: []
58
- rubyforge_project:
59
- rubygems_version: 2.6.11
72
+ rubygems_version: 3.1.4
60
73
  signing_key:
61
74
  specification_version: 4
62
75
  summary: Sync folders with s3 using a metadata file and md5 diffs