logstash-output-s3 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/lib/logstash/outputs/s3.rb +188 -308
  4. data/lib/logstash/outputs/s3/file_repository.rb +120 -0
  5. data/lib/logstash/outputs/s3/patch.rb +22 -0
  6. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  7. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  8. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  9. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  10. data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
  11. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  12. data/lib/logstash/outputs/s3/uploader.rb +59 -0
  13. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  14. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
  15. data/logstash-output-s3.gemspec +2 -2
  16. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  17. data/spec/integration/gzip_file_spec.rb +62 -0
  18. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  19. data/spec/integration/restore_from_crash_spec.rb +39 -0
  20. data/spec/integration/size_rotation_spec.rb +59 -0
  21. data/spec/integration/stress_test_spec.rb +60 -0
  22. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  23. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
  24. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  25. data/spec/outputs/s3/file_repository_spec.rb +146 -0
  26. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  27. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  28. data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
  29. data/spec/outputs/s3/temporary_file_spec.rb +40 -0
  30. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  31. data/spec/outputs/s3/uploader_spec.rb +57 -0
  32. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  33. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
  34. data/spec/outputs/s3_spec.rb +52 -335
  35. data/spec/spec_helper.rb +6 -0
  36. data/spec/supports/helpers.rb +33 -9
  37. metadata +65 -4
  38. data/spec/integration/s3_spec.rb +0 -97
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class WritableDirectoryValidator
6
+ def self.valid?(path)
7
+ begin
8
+ FileUtils.mkdir_p(path) unless Dir.exist?(path)
9
+ ::File.writable?(path)
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+ require "stud/temporary"
3
+ require "socket"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ class WriteBucketPermissionValidator
10
+ def self.valid?(bucket_resource)
11
+ begin
12
+ upload_test_file(bucket_resource)
13
+ true
14
+ rescue
15
+ false
16
+ end
17
+ end
18
+
19
+ private
20
+ def self.upload_test_file(bucket_resource)
21
+ generated_at = Time.now
22
+
23
+ key = "logstash-programmatic-access-test-object-#{generated_at}"
24
+ content = "Logstash permission check on #{generated_at}, by #{Socket.gethostname}"
25
+
26
+ begin
27
+ f = Stud::Temporary.file
28
+ f.write(content)
29
+ f.fsync
30
+ f.close
31
+
32
+ obj = bucket_resource.object(key)
33
+ obj.upload_file(f)
34
+
35
+ begin
36
+ obj.delete
37
+ rescue
38
+ # Try to remove the files on the remote bucket,
39
+ # but don't raise any errors if that doesn't work.
40
+ # since we only really need `putobject`.
41
+ end
42
+ ensure
43
+ FileUtils.rm_rf(f.path)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,7 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
-
3
2
  s.name = 'logstash-output-s3'
4
- s.version = '3.2.0'
3
+ s.version = '4.0.0'
5
4
  s.licenses = ['Apache-2.0']
6
5
  s.summary = "This plugin was created for store the logstash's events into Amazon Simple Storage Service (Amazon S3)"
7
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -22,6 +21,7 @@ Gem::Specification.new do |s|
22
21
  # Gem dependencies
23
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
24
23
  s.add_runtime_dependency 'logstash-mixin-aws'
24
+ s.add_runtime_dependency "concurrent-ruby"
25
25
  s.add_runtime_dependency 'stud', '~> 0.0.22'
26
26
  s.add_development_dependency 'logstash-devutils'
27
27
  s.add_development_dependency 'logstash-input-generator'
@@ -0,0 +1,92 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Dynamic Prefix", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:options) { main_options.merge({ "rotation_strategy" => "size" }) }
11
+ let(:sandbox) { "test" }
12
+
13
+ before do
14
+ clean_remote_files(sandbox)
15
+ subject.register
16
+ subject.multi_receive_encoded(batch)
17
+ subject.close
18
+ end
19
+
20
+ context "With field string" do
21
+ let(:prefix) { "/#{sandbox}/%{server}/%{language}" }
22
+ let(:batch) do
23
+ b = {}
24
+ e1 = LogStash::Event.new({ "server" => "es1", "language" => "ruby"})
25
+ b[e1] = "es1-ruby"
26
+ e2 = LogStash::Event.new({ "server" => "es2", "language" => "java"})
27
+ b[e2] = "es2-ruby"
28
+ b
29
+ end
30
+
31
+ it "creates a specific quantity of files" do
32
+ expect(bucket_resource.objects(:prefix => sandbox).count).to eq(batch.size)
33
+ end
34
+
35
+ it "creates specific keys" do
36
+ re = Regexp.union(/^es1\/ruby\/ls.s3.sashimi/, /^es2\/java\/ls.s3.sashimi/)
37
+
38
+ bucket_resource.objects(:prefix => sandbox) do |obj|
39
+ expect(obj.key).to match(re)
40
+ end
41
+ end
42
+
43
+ it "Persists all events" do
44
+ download_directory = Stud::Temporary.pathname
45
+
46
+ FileUtils.rm_rf(download_directory)
47
+ FileUtils.mkdir_p(download_directory)
48
+
49
+ counter = 0
50
+ bucket_resource.objects(:prefix => sandbox).each do |object|
51
+ target = File.join(download_directory, "#{counter}.txt")
52
+ object.get(:response_target => target)
53
+ counter += 1
54
+ end
55
+ expect(Dir.glob(File.join(download_directory, "**", "*.txt")).inject(0) { |sum, f| sum + IO.readlines(f).size }).to eq(batch.size)
56
+ end
57
+ end
58
+
59
+ context "with unsupported char" do
60
+ let(:prefix) { "/#{sandbox}/%{server}/%{language}" }
61
+ let(:batch) do
62
+ b = {}
63
+ e1 = LogStash::Event.new({ "server" => "e>s1", "language" => "ruby"})
64
+ b[e1] = "es2-ruby"
65
+ b
66
+ end
67
+
68
+ it "convert them to underscore" do
69
+ re = Regexp.union(/^e_s1\/ruby\/ls.s3.sashimi/)
70
+
71
+ bucket_resource.objects(:prefix => sandbox) do |obj|
72
+ expect(obj.key).to match(re)
73
+ end
74
+ end
75
+ end
76
+
77
+ context "with dates" do
78
+ let(:prefix) { "/#{sandbox}/%{+YYYY-MM-d}" }
79
+
80
+ let(:batch) do
81
+ b = {}
82
+ e1 = LogStash::Event.new({ "server" => "e>s1", "language" => "ruby"})
83
+ b[e1] = "es2-ruby"
84
+ b
85
+ end
86
+
87
+ it "creates dated path" do
88
+ re = /^#{sandbox}\/\d{4}-\d{2}-\d{1,2}\/ls\.s3\./
89
+ expect(bucket_resource.objects(:prefix => sandbox).first.key).to match(re)
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Gzip File Time rotation with constant write", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:time_file) { 0.004 }
11
+ let(:options) { main_options.merge({ "encoding" => "gzip",
12
+ "rotation_strategy" => "time" }) }
13
+ let(:number_of_events) { 5000 }
14
+ let(:batch_size) { 125 }
15
+ let(:event_encoded) { "Hello world" }
16
+ let(:batch) do
17
+ b = {}
18
+ number_of_events.times do
19
+ event = LogStash::Event.new({ "message" => event_encoded })
20
+ b[event] = "#{event_encoded}\n"
21
+ end
22
+ b
23
+ end
24
+ let(:minimum_number_of_time_rotation) { 3 }
25
+ let(:batch_step) { (number_of_events / minimum_number_of_time_rotation).ceil }
26
+
27
+ before do
28
+ clean_remote_files(prefix)
29
+ subject.register
30
+
31
+ # simulate batch read/write
32
+ batch.each_slice(batch_step) do |batch_time|
33
+ batch_time.each_slice(batch_size) do |smaller_batch|
34
+ subject.multi_receive_encoded(smaller_batch)
35
+ end
36
+ sleep(1)
37
+ end
38
+
39
+ subject.close
40
+ end
41
+
42
+ it "creates multiples files" do
43
+ # using close will upload the current file
44
+ expect(bucket_resource.objects(:prefix => prefix).count).to be_between(minimum_number_of_time_rotation, minimum_number_of_time_rotation + 1).inclusive
45
+ end
46
+
47
+ it "Persists all events" do
48
+ download_directory = Stud::Temporary.pathname
49
+
50
+ FileUtils.rm_rf(download_directory)
51
+ FileUtils.mkdir_p(download_directory)
52
+
53
+ counter = 0
54
+ bucket_resource.objects(:prefix => prefix).each do |object|
55
+ target = File.join(download_directory, "#{counter}.gz")
56
+ object.get(:response_target => target)
57
+ counter += 1
58
+ end
59
+
60
+ expect(Dir.glob(File.join(download_directory, "**", "*.gz")).inject(0) { |sum, f| sum + Zlib::GzipReader.new(File.open(f)).readlines.size }).to eq(number_of_events)
61
+ end
62
+ end
@@ -0,0 +1,63 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Gzip Size rotation", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:document_size) { 20 * 1024 } # in bytes
11
+
12
+ let(:options) do
13
+ main_options.merge({
14
+ "encoding" => "gzip",
15
+ "size_file" => document_size,
16
+ "rotation_strategy" => "size" })
17
+ end
18
+
19
+ let(:number_of_events) { 1_000_000 }
20
+ let(:batch_size) { 125 }
21
+ let(:event_encoded) { "Hello world" * 20 }
22
+ let(:batch) do
23
+ b = {}
24
+ batch_size.times do
25
+ event = LogStash::Event.new({ "message" => event_encoded })
26
+ b[event] = "#{event_encoded}\n"
27
+ end
28
+ b
29
+ end
30
+ let(:number_of_files) { number_of_events / 50000 }
31
+
32
+ before do
33
+ clean_remote_files(prefix)
34
+ subject.register
35
+ (number_of_events/batch_size).times do
36
+ subject.multi_receive_encoded(batch)
37
+ end
38
+ subject.close
39
+ end
40
+
41
+ it "Rotates the files based on size" do
42
+ f = bucket_resource.objects(:prefix => prefix).first
43
+ expect(f.size).to be_between(document_size, document_size * 2).inclusive
44
+ end
45
+
46
+ it "Persists all events" do
47
+ download_directory = Stud::Temporary.pathname
48
+
49
+ FileUtils.rm_rf(download_directory)
50
+ FileUtils.mkdir_p(download_directory)
51
+
52
+ counter = 0
53
+ bucket_resource.objects(:prefix => prefix).each do |object|
54
+ target = File.join(download_directory, "#{counter}.txt.gz")
55
+ object.get(:response_target => target)
56
+ counter += 1
57
+ end
58
+
59
+ expect(Dir.glob(File.join(download_directory, "**", "*.gz")).inject(0) do |sum, f|
60
+ sum + Zlib::GzipReader.new(File.open(f)).readlines.size
61
+ end).to eq(number_of_events)
62
+ end
63
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Restore from crash", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:options) { main_options.merge({ "restore" => true }) }
11
+
12
+ let(:number_of_files) { 5 }
13
+ let(:dummy_content) { "foobar\n" * 100 }
14
+
15
+ before do
16
+ clean_remote_files(prefix)
17
+ # Use the S3 factory to create mutliples files with dummy content
18
+ factory = LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)
19
+
20
+ # Creating a factory always create a file
21
+ factory.current.write(dummy_content)
22
+ factory.current.fsync
23
+
24
+ (number_of_files - 1).times do
25
+ factory.rotate!
26
+ factory.current.write(dummy_content)
27
+ factory.current.fsync
28
+ end
29
+ end
30
+
31
+ it "uploads the file to the bucket" do
32
+ subject.register
33
+ try(20) do
34
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
35
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
36
+ end
37
+ end
38
+ end
39
+
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Size rotation", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:event_size) { "Hello world".bytesize }
11
+ let(:size_file) { batch_size * event_size * 2 }
12
+ let(:options) { main_options.merge({ "rotation_strategy" => "size", "size_file" => size_file }) }
13
+ let(:number_of_events) { 5000 }
14
+ let(:batch_size) { 125 }
15
+ let(:event_encoded) { "Hello world" }
16
+ let(:batch) do
17
+ b = {}
18
+ number_of_events.times do
19
+ event = LogStash::Event.new({ "message" => event_encoded })
20
+ b[event] = "#{event_encoded}\n"
21
+ end
22
+ b
23
+ end
24
+ let(:number_of_files) { number_of_events * event_size / size_file }
25
+
26
+ before do
27
+ clean_remote_files(prefix)
28
+ subject.register
29
+ batch.each_slice(batch_size) do |smaller_batch|
30
+ subject.multi_receive_encoded(smaller_batch)
31
+ end
32
+ subject.close
33
+ end
34
+
35
+ it "creates a specific quantity of files" do
36
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
37
+ end
38
+
39
+ it "Rotates the files based on size" do
40
+ bucket_resource.objects(:prefix => prefix).each do |f|
41
+ expect(f.size).to be_between(size_file, size_file * 2).inclusive
42
+ end
43
+ end
44
+
45
+ it "Persists all events" do
46
+ download_directory = Stud::Temporary.pathname
47
+
48
+ FileUtils.rm_rf(download_directory)
49
+ FileUtils.mkdir_p(download_directory)
50
+
51
+ counter = 0
52
+ bucket_resource.objects(:prefix => prefix).each do |object|
53
+ target = File.join(download_directory, "#{counter}.txt")
54
+ object.get(:response_target => target)
55
+ counter += 1
56
+ end
57
+ expect(Dir.glob(File.join(download_directory, "**", "*.txt")).inject(0) { |sum, f| sum + IO.readlines(f).size }).to eq(number_of_events)
58
+ end
59
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Upload current file on shutdown", :integration => true, :slow => true do
8
+ include_context "setup plugin"
9
+ let(:stress_time) { ENV["RUNTIME"] || 1 * 60 }
10
+ let(:options) { main_options }
11
+
12
+ let(:time_file) { 15 }
13
+ let(:batch_size) { 125 }
14
+ let(:event_encoded) { "Hello world" }
15
+ let(:batch) do
16
+ b = {}
17
+ batch_size.times do
18
+ event = LogStash::Event.new({ "message" => event_encoded })
19
+ b[event] = "#{event_encoded}\n"
20
+ end
21
+ b
22
+ end
23
+ let(:workers) { 3 }
24
+
25
+ it "Persists all events" do
26
+ started_at = Time.now
27
+ events_sent = {}
28
+
29
+ clean_remote_files(prefix)
30
+ subject.register
31
+
32
+ workers.times do
33
+ Thread.new do
34
+ events_sent[Thread.current] = 0
35
+
36
+ while Time.now - started_at < stress_time
37
+ subject.multi_receive_encoded(batch)
38
+ events_sent[Thread.current] += batch_size
39
+ end
40
+ end
41
+ end
42
+
43
+ sleep(1) while Time.now - started_at < stress_time
44
+
45
+ subject.close
46
+
47
+ download_directory = Stud::Temporary.pathname
48
+
49
+ FileUtils.rm_rf(download_directory)
50
+ FileUtils.mkdir_p(download_directory)
51
+
52
+ counter = 0
53
+ bucket_resource.objects(:prefix => prefix).each do |object|
54
+ target = File.join(download_directory, "#{counter}.txt")
55
+ object.get(:response_target => target)
56
+ counter += 1
57
+ end
58
+ expect(Dir.glob(File.join(download_directory, "**", "*.txt")).inject(0) { |sum, f| sum + IO.readlines(f).size }).to eq(events_sent.values.inject(0, :+))
59
+ end
60
+ end