logstash-output-s3 3.2.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/lib/logstash/outputs/s3.rb +188 -308
  4. data/lib/logstash/outputs/s3/file_repository.rb +120 -0
  5. data/lib/logstash/outputs/s3/patch.rb +22 -0
  6. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  7. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  8. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  9. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  10. data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
  11. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  12. data/lib/logstash/outputs/s3/uploader.rb +59 -0
  13. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  14. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
  15. data/logstash-output-s3.gemspec +2 -2
  16. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  17. data/spec/integration/gzip_file_spec.rb +62 -0
  18. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  19. data/spec/integration/restore_from_crash_spec.rb +39 -0
  20. data/spec/integration/size_rotation_spec.rb +59 -0
  21. data/spec/integration/stress_test_spec.rb +60 -0
  22. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  23. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
  24. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  25. data/spec/outputs/s3/file_repository_spec.rb +146 -0
  26. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  27. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  28. data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
  29. data/spec/outputs/s3/temporary_file_spec.rb +40 -0
  30. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  31. data/spec/outputs/s3/uploader_spec.rb +57 -0
  32. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  33. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
  34. data/spec/outputs/s3_spec.rb +52 -335
  35. data/spec/spec_helper.rb +6 -0
  36. data/spec/supports/helpers.rb +33 -9
  37. metadata +65 -4
  38. data/spec/integration/s3_spec.rb +0 -97
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class WritableDirectoryValidator
6
+ def self.valid?(path)
7
+ begin
8
+ FileUtils.mkdir_p(path) unless Dir.exist?(path)
9
+ ::File.writable?(path)
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+ require "stud/temporary"
3
+ require "socket"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ class WriteBucketPermissionValidator
10
+ def self.valid?(bucket_resource)
11
+ begin
12
+ upload_test_file(bucket_resource)
13
+ true
14
+ rescue
15
+ false
16
+ end
17
+ end
18
+
19
+ private
20
+ def self.upload_test_file(bucket_resource)
21
+ generated_at = Time.now
22
+
23
+ key = "logstash-programmatic-access-test-object-#{generated_at}"
24
+ content = "Logstash permission check on #{generated_at}, by #{Socket.gethostname}"
25
+
26
+ begin
27
+ f = Stud::Temporary.file
28
+ f.write(content)
29
+ f.fsync
30
+ f.close
31
+
32
+ obj = bucket_resource.object(key)
33
+ obj.upload_file(f)
34
+
35
+ begin
36
+ obj.delete
37
+ rescue
38
+ # Try to remove the files on the remote bucket,
39
+ # but don't raise any errors if that doesn't work.
40
+ # since we only really need `putobject`.
41
+ end
42
+ ensure
43
+ FileUtils.rm_rf(f.path)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,7 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
-
3
2
  s.name = 'logstash-output-s3'
4
- s.version = '3.2.0'
3
+ s.version = '4.0.0'
5
4
  s.licenses = ['Apache-2.0']
6
5
  s.summary = "This plugin was created for store the logstash's events into Amazon Simple Storage Service (Amazon S3)"
7
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -22,6 +21,7 @@ Gem::Specification.new do |s|
22
21
  # Gem dependencies
23
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
24
23
  s.add_runtime_dependency 'logstash-mixin-aws'
24
+ s.add_runtime_dependency "concurrent-ruby"
25
25
  s.add_runtime_dependency 'stud', '~> 0.0.22'
26
26
  s.add_development_dependency 'logstash-devutils'
27
27
  s.add_development_dependency 'logstash-input-generator'
@@ -0,0 +1,92 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Dynamic Prefix", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:options) { main_options.merge({ "rotation_strategy" => "size" }) }
11
+ let(:sandbox) { "test" }
12
+
13
+ before do
14
+ clean_remote_files(sandbox)
15
+ subject.register
16
+ subject.multi_receive_encoded(batch)
17
+ subject.close
18
+ end
19
+
20
+ context "With field string" do
21
+ let(:prefix) { "/#{sandbox}/%{server}/%{language}" }
22
+ let(:batch) do
23
+ b = {}
24
+ e1 = LogStash::Event.new({ "server" => "es1", "language" => "ruby"})
25
+ b[e1] = "es1-ruby"
26
+ e2 = LogStash::Event.new({ "server" => "es2", "language" => "java"})
27
+ b[e2] = "es2-ruby"
28
+ b
29
+ end
30
+
31
+ it "creates a specific quantity of files" do
32
+ expect(bucket_resource.objects(:prefix => sandbox).count).to eq(batch.size)
33
+ end
34
+
35
+ it "creates specific keys" do
36
+ re = Regexp.union(/^es1\/ruby\/ls.s3.sashimi/, /^es2\/java\/ls.s3.sashimi/)
37
+
38
+ bucket_resource.objects(:prefix => sandbox) do |obj|
39
+ expect(obj.key).to match(re)
40
+ end
41
+ end
42
+
43
+ it "Persists all events" do
44
+ download_directory = Stud::Temporary.pathname
45
+
46
+ FileUtils.rm_rf(download_directory)
47
+ FileUtils.mkdir_p(download_directory)
48
+
49
+ counter = 0
50
+ bucket_resource.objects(:prefix => sandbox).each do |object|
51
+ target = File.join(download_directory, "#{counter}.txt")
52
+ object.get(:response_target => target)
53
+ counter += 1
54
+ end
55
+ expect(Dir.glob(File.join(download_directory, "**", "*.txt")).inject(0) { |sum, f| sum + IO.readlines(f).size }).to eq(batch.size)
56
+ end
57
+ end
58
+
59
+ context "with unsupported char" do
60
+ let(:prefix) { "/#{sandbox}/%{server}/%{language}" }
61
+ let(:batch) do
62
+ b = {}
63
+ e1 = LogStash::Event.new({ "server" => "e>s1", "language" => "ruby"})
64
+ b[e1] = "es2-ruby"
65
+ b
66
+ end
67
+
68
+ it "convert them to underscore" do
69
+ re = Regexp.union(/^e_s1\/ruby\/ls.s3.sashimi/)
70
+
71
+ bucket_resource.objects(:prefix => sandbox) do |obj|
72
+ expect(obj.key).to match(re)
73
+ end
74
+ end
75
+ end
76
+
77
+ context "with dates" do
78
+ let(:prefix) { "/#{sandbox}/%{+YYYY-MM-d}" }
79
+
80
+ let(:batch) do
81
+ b = {}
82
+ e1 = LogStash::Event.new({ "server" => "e>s1", "language" => "ruby"})
83
+ b[e1] = "es2-ruby"
84
+ b
85
+ end
86
+
87
+ it "creates dated path" do
88
+ re = /^#{sandbox}\/\d{4}-\d{2}-\d{1,2}\/ls\.s3\./
89
+ expect(bucket_resource.objects(:prefix => sandbox).first.key).to match(re)
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Gzip File Time rotation with constant write", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:time_file) { 0.004 }
11
+ let(:options) { main_options.merge({ "encoding" => "gzip",
12
+ "rotation_strategy" => "time" }) }
13
+ let(:number_of_events) { 5000 }
14
+ let(:batch_size) { 125 }
15
+ let(:event_encoded) { "Hello world" }
16
+ let(:batch) do
17
+ b = {}
18
+ number_of_events.times do
19
+ event = LogStash::Event.new({ "message" => event_encoded })
20
+ b[event] = "#{event_encoded}\n"
21
+ end
22
+ b
23
+ end
24
+ let(:minimum_number_of_time_rotation) { 3 }
25
+ let(:batch_step) { (number_of_events / minimum_number_of_time_rotation).ceil }
26
+
27
+ before do
28
+ clean_remote_files(prefix)
29
+ subject.register
30
+
31
+ # simulate batch read/write
32
+ batch.each_slice(batch_step) do |batch_time|
33
+ batch_time.each_slice(batch_size) do |smaller_batch|
34
+ subject.multi_receive_encoded(smaller_batch)
35
+ end
36
+ sleep(1)
37
+ end
38
+
39
+ subject.close
40
+ end
41
+
42
+ it "creates multiples files" do
43
+ # using close will upload the current file
44
+ expect(bucket_resource.objects(:prefix => prefix).count).to be_between(minimum_number_of_time_rotation, minimum_number_of_time_rotation + 1).inclusive
45
+ end
46
+
47
+ it "Persists all events" do
48
+ download_directory = Stud::Temporary.pathname
49
+
50
+ FileUtils.rm_rf(download_directory)
51
+ FileUtils.mkdir_p(download_directory)
52
+
53
+ counter = 0
54
+ bucket_resource.objects(:prefix => prefix).each do |object|
55
+ target = File.join(download_directory, "#{counter}.gz")
56
+ object.get(:response_target => target)
57
+ counter += 1
58
+ end
59
+
60
+ expect(Dir.glob(File.join(download_directory, "**", "*.gz")).inject(0) { |sum, f| sum + Zlib::GzipReader.new(File.open(f)).readlines.size }).to eq(number_of_events)
61
+ end
62
+ end
@@ -0,0 +1,63 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Gzip Size rotation", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:document_size) { 20 * 1024 } # in bytes
11
+
12
+ let(:options) do
13
+ main_options.merge({
14
+ "encoding" => "gzip",
15
+ "size_file" => document_size,
16
+ "rotation_strategy" => "size" })
17
+ end
18
+
19
+ let(:number_of_events) { 1_000_000 }
20
+ let(:batch_size) { 125 }
21
+ let(:event_encoded) { "Hello world" * 20 }
22
+ let(:batch) do
23
+ b = {}
24
+ batch_size.times do
25
+ event = LogStash::Event.new({ "message" => event_encoded })
26
+ b[event] = "#{event_encoded}\n"
27
+ end
28
+ b
29
+ end
30
+ let(:number_of_files) { number_of_events / 50000 }
31
+
32
+ before do
33
+ clean_remote_files(prefix)
34
+ subject.register
35
+ (number_of_events/batch_size).times do
36
+ subject.multi_receive_encoded(batch)
37
+ end
38
+ subject.close
39
+ end
40
+
41
+ it "Rotates the files based on size" do
42
+ f = bucket_resource.objects(:prefix => prefix).first
43
+ expect(f.size).to be_between(document_size, document_size * 2).inclusive
44
+ end
45
+
46
+ it "Persists all events" do
47
+ download_directory = Stud::Temporary.pathname
48
+
49
+ FileUtils.rm_rf(download_directory)
50
+ FileUtils.mkdir_p(download_directory)
51
+
52
+ counter = 0
53
+ bucket_resource.objects(:prefix => prefix).each do |object|
54
+ target = File.join(download_directory, "#{counter}.txt.gz")
55
+ object.get(:response_target => target)
56
+ counter += 1
57
+ end
58
+
59
+ expect(Dir.glob(File.join(download_directory, "**", "*.gz")).inject(0) do |sum, f|
60
+ sum + Zlib::GzipReader.new(File.open(f)).readlines.size
61
+ end).to eq(number_of_events)
62
+ end
63
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Restore from crash", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:options) { main_options.merge({ "restore" => true }) }
11
+
12
+ let(:number_of_files) { 5 }
13
+ let(:dummy_content) { "foobar\n" * 100 }
14
+
15
+ before do
16
+ clean_remote_files(prefix)
17
+ # Use the S3 factory to create mutliples files with dummy content
18
+ factory = LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)
19
+
20
+ # Creating a factory always create a file
21
+ factory.current.write(dummy_content)
22
+ factory.current.fsync
23
+
24
+ (number_of_files - 1).times do
25
+ factory.rotate!
26
+ factory.current.write(dummy_content)
27
+ factory.current.fsync
28
+ end
29
+ end
30
+
31
+ it "uploads the file to the bucket" do
32
+ subject.register
33
+ try(20) do
34
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
35
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
36
+ end
37
+ end
38
+ end
39
+
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Size rotation", :integration => true do
8
+ include_context "setup plugin"
9
+
10
+ let(:event_size) { "Hello world".bytesize }
11
+ let(:size_file) { batch_size * event_size * 2 }
12
+ let(:options) { main_options.merge({ "rotation_strategy" => "size", "size_file" => size_file }) }
13
+ let(:number_of_events) { 5000 }
14
+ let(:batch_size) { 125 }
15
+ let(:event_encoded) { "Hello world" }
16
+ let(:batch) do
17
+ b = {}
18
+ number_of_events.times do
19
+ event = LogStash::Event.new({ "message" => event_encoded })
20
+ b[event] = "#{event_encoded}\n"
21
+ end
22
+ b
23
+ end
24
+ let(:number_of_files) { number_of_events * event_size / size_file }
25
+
26
+ before do
27
+ clean_remote_files(prefix)
28
+ subject.register
29
+ batch.each_slice(batch_size) do |smaller_batch|
30
+ subject.multi_receive_encoded(smaller_batch)
31
+ end
32
+ subject.close
33
+ end
34
+
35
+ it "creates a specific quantity of files" do
36
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
37
+ end
38
+
39
+ it "Rotates the files based on size" do
40
+ bucket_resource.objects(:prefix => prefix).each do |f|
41
+ expect(f.size).to be_between(size_file, size_file * 2).inclusive
42
+ end
43
+ end
44
+
45
+ it "Persists all events" do
46
+ download_directory = Stud::Temporary.pathname
47
+
48
+ FileUtils.rm_rf(download_directory)
49
+ FileUtils.mkdir_p(download_directory)
50
+
51
+ counter = 0
52
+ bucket_resource.objects(:prefix => prefix).each do |object|
53
+ target = File.join(download_directory, "#{counter}.txt")
54
+ object.get(:response_target => target)
55
+ counter += 1
56
+ end
57
+ expect(Dir.glob(File.join(download_directory, "**", "*.txt")).inject(0) { |sum, f| sum + IO.readlines(f).size }).to eq(number_of_events)
58
+ end
59
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+ require_relative "../spec_helper"
3
+ require "logstash/outputs/s3"
4
+ require "logstash/codecs/line"
5
+ require "stud/temporary"
6
+
7
+ describe "Upload current file on shutdown", :integration => true, :slow => true do
8
+ include_context "setup plugin"
9
+ let(:stress_time) { ENV["RUNTIME"] || 1 * 60 }
10
+ let(:options) { main_options }
11
+
12
+ let(:time_file) { 15 }
13
+ let(:batch_size) { 125 }
14
+ let(:event_encoded) { "Hello world" }
15
+ let(:batch) do
16
+ b = {}
17
+ batch_size.times do
18
+ event = LogStash::Event.new({ "message" => event_encoded })
19
+ b[event] = "#{event_encoded}\n"
20
+ end
21
+ b
22
+ end
23
+ let(:workers) { 3 }
24
+
25
+ it "Persists all events" do
26
+ started_at = Time.now
27
+ events_sent = {}
28
+
29
+ clean_remote_files(prefix)
30
+ subject.register
31
+
32
+ workers.times do
33
+ Thread.new do
34
+ events_sent[Thread.current] = 0
35
+
36
+ while Time.now - started_at < stress_time
37
+ subject.multi_receive_encoded(batch)
38
+ events_sent[Thread.current] += batch_size
39
+ end
40
+ end
41
+ end
42
+
43
+ sleep(1) while Time.now - started_at < stress_time
44
+
45
+ subject.close
46
+
47
+ download_directory = Stud::Temporary.pathname
48
+
49
+ FileUtils.rm_rf(download_directory)
50
+ FileUtils.mkdir_p(download_directory)
51
+
52
+ counter = 0
53
+ bucket_resource.objects(:prefix => prefix).each do |object|
54
+ target = File.join(download_directory, "#{counter}.txt")
55
+ object.get(:response_target => target)
56
+ counter += 1
57
+ end
58
+ expect(Dir.glob(File.join(download_directory, "**", "*.txt")).inject(0) { |sum, f| sum + IO.readlines(f).size }).to eq(events_sent.values.inject(0, :+))
59
+ end
60
+ end