logstash-output-s3 4.3.7 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 238e7be91fe40e4fcb80736f3e8d62e76b6f8108be35e5a0c054cb19bb239428
4
- data.tar.gz: eb0c70181aa21d20b794cd05d2e458b322b10f6e343ae41d96a1e6f49cf85858
3
+ metadata.gz: 7fe328033b222f10871103a51430bfe6f6a269f15460f70e72e423b6b135927e
4
+ data.tar.gz: e845c48187f640a948624f7da5bf3b6cec3eee3b25c81f0024014494a71363e5
5
5
  SHA512:
6
- metadata.gz: 1194c4ee3defe1104fcc1c68d914c20e2ca2548ba6405960c75d4b398a7cd9949287c3410ead913ff82b5183164c4bd5bf858fe836f94f63654c582efab78ab5
7
- data.tar.gz: cb135c3b28297db0f5ad90a4f7ab4ba6b8c112574815e38bdccdec685f5caa28ed7636f68569586f71205d651118dad7cc7dd0194786efb59bfb8113fa7d0afb
6
+ metadata.gz: 9c5a89d3551c5d199b0b289c17b0a36138c4efbb4c4191610167039aab4692588a0c9fc17a158af788ca704d9fbd454a9f2de501b2aa34d17f5aee5820345829
7
+ data.tar.gz: 989eef2c121767e315199177f0d672bea2b2ff18fcb6a5a9acf21e8b813adadbcd35ab0ce221eff4a633bdaf735e9267b7943a3fbefb788dee42bdc4d1df293a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 4.4.0
2
+ - Logstash recovers corrupted gzip and uploads to S3 [#249](https://github.com/logstash-plugins/logstash-output-s3/pull/249)
3
+
1
4
  ## 4.3.7
2
5
  - Refactor: avoid usage of CHM (JRuby 9.3.4 work-around) [#248](https://github.com/logstash-plugins/logstash-output-s3/pull/248)
3
6
 
data/README.md CHANGED
@@ -19,7 +19,7 @@ Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/log
19
19
 
20
20
  ## Developing
21
21
 
22
- ### 1. Plugin Developement and Testing
22
+ ### 1. Plugin Development and Testing
23
23
 
24
24
  #### Code
25
25
  - To get started, you'll need JRuby with the Bundler gem installed.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 4.4.0
@@ -7,7 +7,7 @@ module LogStash
7
7
 
8
8
  def initialize(size_file)
9
9
  if size_file <= 0
10
- raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
11
  end
12
12
 
13
13
  @size_file = size_file
@@ -2,15 +2,23 @@
2
2
  require "thread"
3
3
  require "forwardable"
4
4
  require "fileutils"
5
+ require "logstash-output-s3_jars"
5
6
 
6
7
  module LogStash
7
8
  module Outputs
8
9
  class S3
9
- # Wrap the actual file descriptor into an utility classe
10
- # It make it more OOP and easier to reason with the paths.
10
+
11
+ java_import 'org.logstash.plugins.outputs.s3.GzipUtil'
12
+
13
+ # Wrap the actual file descriptor into an utility class
14
+ # Make it more OOP and easier to reason with the paths.
11
15
  class TemporaryFile
12
16
  extend Forwardable
13
17
 
18
+ GZIP_EXTENSION = "txt.gz"
19
+ TXT_EXTENSION = "txt"
20
+ RECOVERED_FILE_NAME_TAG = "-recovered"
21
+
14
22
  def_delegators :@fd, :path, :write, :close, :fsync
15
23
 
16
24
  attr_reader :fd
@@ -33,8 +41,10 @@ module LogStash
33
41
  def size
34
42
  # Use the fd size to get the accurate result,
35
43
  # so we dont have to deal with fsync
36
- # if the file is close we will use the File::size
44
+ # if the file is close, fd.size raises an IO exception so we use the File::size
37
45
  begin
46
+ # fd is nil when LS tries to recover gzip file but fails
47
+ return 0 unless @fd != nil
38
48
  @fd.size
39
49
  rescue IOError
40
50
  ::File.size(path)
@@ -45,7 +55,7 @@ module LogStash
45
55
  @key.gsub(/^\//, "")
46
56
  end
47
57
 
48
- # Each temporary file is made inside a directory named with an UUID,
58
+ # Each temporary file is created inside a directory named with an UUID,
49
59
  # instead of deleting the file directly and having the risk of deleting other files
50
60
  # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
61
  # a sandbox.
@@ -58,13 +68,46 @@ module LogStash
58
68
  size == 0
59
69
  end
60
70
 
71
+ # only to cover the case where LS cannot restore corrupted file, file is not exist
72
+ def recoverable?
73
+ !@fd.nil?
74
+ end
75
+
61
76
  def self.create_from_existing_file(file_path, temporary_folder)
62
77
  key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
78
 
79
+ # recover gzip file and compress back before uploading to S3
80
+ if file_path.end_with?("." + GZIP_EXTENSION)
81
+ file_path = self.recover(file_path)
82
+ end
64
83
  TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
- ::File.open(file_path, "r"),
84
+ ::File.exist?(file_path) ? ::File.open(file_path, "r") : nil, # for the nil case, file size will be 0 and upload will be ignored.
66
85
  ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
86
  end
87
+
88
+ def self.gzip_extension
89
+ GZIP_EXTENSION
90
+ end
91
+
92
+ def self.text_extension
93
+ TXT_EXTENSION
94
+ end
95
+
96
+ def self.recovery_file_name_tag
97
+ RECOVERED_FILE_NAME_TAG
98
+ end
99
+
100
+ private
101
+ def self.recover(file_path)
102
+ full_gzip_extension = "." + GZIP_EXTENSION
103
+ recovered_txt_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + "." + TXT_EXTENSION)
104
+ recovered_gzip_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + full_gzip_extension)
105
+ GzipUtil.recover(file_path, recovered_txt_file_path)
106
+ if ::File.exist?(recovered_txt_file_path) && !::File.zero?(recovered_txt_file_path)
107
+ GzipUtil.compress(recovered_txt_file_path, recovered_gzip_file_path)
108
+ end
109
+ recovered_gzip_file_path
110
+ end
68
111
  end
69
112
  end
70
113
  end
@@ -19,9 +19,6 @@ module LogStash
19
19
  # I do not have to mess around to check if the other directory have file in it before destroying them.
20
20
  class TemporaryFileFactory
21
21
  FILE_MODE = "a"
22
- GZIP_ENCODING = "gzip"
23
- GZIP_EXTENSION = "txt.gz"
24
- TXT_EXTENSION = "txt"
25
22
  STRFTIME = "%Y-%m-%dT%H.%M"
26
23
 
27
24
  attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
@@ -48,7 +45,7 @@ module LogStash
48
45
 
49
46
  private
50
47
  def extension
51
- gzip? ? GZIP_EXTENSION : TXT_EXTENSION
48
+ gzip? ? TemporaryFile.gzip_extension : TemporaryFile.text_extension
52
49
  end
53
50
 
54
51
  def gzip?
@@ -31,6 +31,7 @@ module LogStash
31
31
  end
32
32
  end
33
33
 
34
+ # uploads a TemporaryFile to S3
34
35
  def upload(file, options = {})
35
36
  upload_options = options.fetch(:upload_options, {})
36
37
 
@@ -68,6 +69,7 @@ module LogStash
68
69
  @workers_pool.shutdown
69
70
  @workers_pool.wait_for_termination(nil) # block until its done
70
71
  end
72
+
71
73
  end
72
74
  end
73
75
  end
@@ -97,6 +97,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
97
97
  :fallback_policy => :caller_runs
98
98
  })
99
99
 
100
+ GZIP_ENCODING = "gzip"
100
101
 
101
102
  config_name "s3"
102
103
  default :codec, "line"
@@ -181,7 +182,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
181
182
  config :tags, :validate => :array, :default => []
182
183
 
183
184
  # Specify the content encoding. Supports ("gzip"). Defaults to "none"
184
- config :encoding, :validate => ["none", "gzip"], :default => "none"
185
+ config :encoding, :validate => ["none", GZIP_ENCODING], :default => "none"
185
186
 
186
187
  # Define the strategy to use to decide when we need to rotate the file and push it to S3,
187
188
  # The default strategy is to check for both size and time, the first one to match will rotate the file.
@@ -315,7 +316,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
315
316
  :server_side_encryption => @server_side_encryption ? @server_side_encryption_algorithm : nil,
316
317
  :ssekms_key_id => @server_side_encryption_algorithm == "aws:kms" ? @ssekms_key_id : nil,
317
318
  :storage_class => @storage_class,
318
- :content_encoding => @encoding == "gzip" ? "gzip" : nil,
319
+ :content_encoding => @encoding == GZIP_ENCODING ? GZIP_ENCODING : nil,
319
320
  :multipart_threshold => @upload_multipart_threshold
320
321
  }
321
322
  end
@@ -397,16 +398,48 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
397
398
  @crash_uploader = Uploader.new(bucket_resource, @logger, CRASH_RECOVERY_THREADPOOL)
398
399
 
399
400
  temp_folder_path = Pathname.new(@temporary_directory)
400
- Dir.glob(::File.join(@temporary_directory, "**/*"))
401
- .select { |file| ::File.file?(file) }
402
- .each do |file|
403
- temp_file = TemporaryFile.create_from_existing_file(file, temp_folder_path)
404
- if temp_file.size > 0
405
- @logger.debug? && @logger.debug("Recovering from crash and uploading", :path => temp_file.path)
406
- @crash_uploader.upload_async(temp_file, :on_complete => method(:clean_temporary_file), :upload_options => upload_options)
401
+ files = Dir.glob(::File.join(@temporary_directory, "**/*"))
402
+ .select { |file_path| ::File.file?(file_path) }
403
+ under_recovery_files = get_under_recovery_files(files)
404
+
405
+ files.each do |file_path|
406
+ # when encoding is GZIP, if file is already recovering or recovered and uploading to S3, log and skip
407
+ if under_recovery_files.include?(file_path)
408
+ unless file_path.include?(TemporaryFile.gzip_extension)
409
+ @logger.warn("The #{file_path} file either under recover process or failed to recover before.")
410
+ end
407
411
  else
408
- clean_temporary_file(temp_file)
412
+ temp_file = TemporaryFile.create_from_existing_file(file_path, temp_folder_path)
413
+ # do not remove or upload if Logstash tries to recover file but fails
414
+ if temp_file.recoverable?
415
+ if temp_file.size > 0
416
+ @logger.debug? && @logger.debug("Recovering from crash and uploading", :path => temp_file.path)
417
+ @crash_uploader.upload_async(temp_file,
418
+ :on_complete => method(:clean_temporary_file),
419
+ :upload_options => upload_options)
420
+ else
421
+ clean_temporary_file(temp_file)
422
+ end
423
+ end
424
+ end
425
+ end
426
+ end
427
+
428
+ # figures out the recovering files and
429
+ # creates a skip list to ignore for the rest of processes
430
+ def get_under_recovery_files(files)
431
+ skip_files = Set.new
432
+ return skip_files unless @encoding == GZIP_ENCODING
433
+
434
+ files.each do |file_path|
435
+ if file_path.include?(TemporaryFile.recovery_file_name_tag)
436
+ skip_files << file_path
437
+ if file_path.include?(TemporaryFile.gzip_extension)
438
+ # also include the original corrupted gzip file
439
+ skip_files << file_path.gsub(TemporaryFile.recovery_file_name_tag, "")
440
+ end
409
441
  end
410
442
  end
443
+ skip_files
411
444
  end
412
445
  end
@@ -0,0 +1,4 @@
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
2
+
3
+ require 'jar_dependencies'
4
+ require_jar('org.logstash.plugins.outputs.s3', 'logstash-output-s3', '4.4.0')
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require "jars/installer"
3
+ require "fileutils"
4
+
5
+ task :vendor do
6
+ exit(1) unless system './gradlew vendor'
7
+ version = File.read("VERSION").strip
8
+ end
9
+
10
+ desc "clean"
11
+ task :clean do
12
+ ["build", "vendor/jar-dependencies", "Gemfile.lock"].each do |p|
13
+ FileUtils.rm_rf(p)
14
+ end
15
+ end
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-s3'
3
- s.version = '4.3.7'
3
+ s.version = '4.4.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Sends Logstash events to the Amazon Simple Storage Service"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
7
7
  s.authors = ["Elastic"]
8
8
  s.email = 'info@elastic.co'
9
9
  s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
10
- s.require_paths = ["lib"]
10
+ s.require_paths = ["lib", "vendor/jar-dependencies"]
11
11
 
12
12
  # Files
13
13
  s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
@@ -7,18 +7,17 @@ require "stud/temporary"
7
7
  describe "Restore from crash", :integration => true do
8
8
  include_context "setup plugin"
9
9
 
10
- let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
11
-
12
10
  let(:number_of_files) { 5 }
13
11
  let(:dummy_content) { "foobar\n" * 100 }
14
- let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
15
12
 
16
13
  before do
17
14
  clean_remote_files(prefix)
18
15
  end
19
16
 
20
-
21
17
  context 'with a non-empty tempfile' do
18
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
19
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
20
+
22
21
  before do
23
22
  # Creating a factory always create a file
24
23
  factory.current.write(dummy_content)
@@ -41,6 +40,9 @@ describe "Restore from crash", :integration => true do
41
40
  end
42
41
 
43
42
  context 'with an empty tempfile' do
43
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
44
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
45
+
44
46
  before do
45
47
  factory.current
46
48
  factory.rotate!
@@ -63,5 +65,68 @@ describe "Restore from crash", :integration => true do
63
65
  expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
64
66
  end
65
67
  end
68
+
69
+ context "#gzip encoding" do
70
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write", "encoding" => "gzip" }) }
71
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "gzip", temporary_directory)}
72
+ describe "with empty recovered file" do
73
+ before do
74
+ # Creating a factory always create a file
75
+ factory.current.write('')
76
+ factory.current.fsync
77
+ factory.current.close
78
+ end
79
+
80
+ it 'should not upload and not remove temp file' do
81
+ subject.register
82
+ try(20) do
83
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
84
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(1)
85
+ end
86
+ end
87
+ end
88
+
89
+ describe "with healthy recovered, size is greater than zero file" do
90
+ before do
91
+ # Creating a factory always create a file
92
+ factory.current.write(dummy_content)
93
+ factory.current.fsync
94
+ factory.current.close
95
+
96
+ (number_of_files - 1).times do
97
+ factory.rotate!
98
+ factory.current.write(dummy_content)
99
+ factory.current.fsync
100
+ factory.current.close
101
+ end
102
+ end
103
+
104
+ it 'should recover, upload to S3 and remove temp file' do
105
+ subject.register
106
+ try(20) do
107
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
108
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
109
+ expect(bucket_resource.objects(:prefix => prefix).first.acl.grants.collect(&:permission)).to include("READ", "WRITE")
110
+ end
111
+ end
112
+ end
113
+
114
+ describe "with failure when recovering" do
115
+ before do
116
+ # Creating a factory always create a file
117
+ factory.current.write(dummy_content)
118
+ factory.current.fsync
119
+ end
120
+
121
+ it 'should not upload to S3 and not remove temp file' do
122
+ subject.register
123
+ try(20) do
124
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
125
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(1)
126
+ end
127
+ end
128
+ end
129
+ end
130
+
66
131
  end
67
132
 
@@ -25,11 +25,11 @@ describe LogStash::Outputs::S3::SizeRotationPolicy do
25
25
  end
26
26
 
27
27
  it "raises an exception if the `size_file` is 0" do
28
- expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /need to be greather than 0/)
28
+ expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /need to be greater than 0/)
29
29
  end
30
30
 
31
31
  it "raises an exception if the `size_file` is < 0" do
32
- expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /need to be greather than 0/)
32
+ expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /need to be greater than 0/)
33
33
  end
34
34
 
35
35
  context "#needs_periodic?" do
@@ -5,6 +5,7 @@ shared_context "setup plugin" do
5
5
  let(:bucket) { ENV["AWS_LOGSTASH_TEST_BUCKET"] }
6
6
  let(:access_key_id) { ENV["AWS_ACCESS_KEY_ID"] }
7
7
  let(:secret_access_key) { ENV["AWS_SECRET_ACCESS_KEY"] }
8
+ let(:session_token) { ENV["AWS_SESSION_TOKEN"] }
8
9
  let(:size_file) { 100 }
9
10
  let(:time_file) { 100 }
10
11
  let(:tags) { [] }
@@ -18,6 +19,7 @@ shared_context "setup plugin" do
18
19
  "temporary_directory" => temporary_directory,
19
20
  "access_key_id" => access_key_id,
20
21
  "secret_access_key" => secret_access_key,
22
+ "session_token" => session_token,
21
23
  "size_file" => size_file,
22
24
  "time_file" => time_file,
23
25
  "region" => region,
@@ -25,7 +27,7 @@ shared_context "setup plugin" do
25
27
  }
26
28
  end
27
29
 
28
- let(:client_credentials) { Aws::Credentials.new(access_key_id, secret_access_key) }
30
+ let(:client_credentials) { Aws::Credentials.new(access_key_id, secret_access_key, session_token) }
29
31
  let(:bucket_resource) { Aws::S3::Bucket.new(bucket, { :credentials => client_credentials, :region => region }) }
30
32
 
31
33
  subject { LogStash::Outputs::S3.new(options) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.3.7
4
+ version: 4.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-16 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -129,7 +129,9 @@ files:
129
129
  - LICENSE
130
130
  - NOTICE.TXT
131
131
  - README.md
132
+ - VERSION
132
133
  - docs/index.asciidoc
134
+ - lib/logstash-output-s3_jars.rb
133
135
  - lib/logstash/outputs/s3.rb
134
136
  - lib/logstash/outputs/s3/file_repository.rb
135
137
  - lib/logstash/outputs/s3/patch.rb
@@ -142,6 +144,7 @@ files:
142
144
  - lib/logstash/outputs/s3/uploader.rb
143
145
  - lib/logstash/outputs/s3/writable_directory_validator.rb
144
146
  - lib/logstash/outputs/s3/write_bucket_permission_validator.rb
147
+ - lib/tasks/build.rake
145
148
  - logstash-output-s3.gemspec
146
149
  - spec/integration/dynamic_prefix_spec.rb
147
150
  - spec/integration/gzip_file_spec.rb
@@ -164,6 +167,7 @@ files:
164
167
  - spec/outputs/s3_spec.rb
165
168
  - spec/spec_helper.rb
166
169
  - spec/supports/helpers.rb
170
+ - vendor/jar-dependencies/org/logstash/plugins/outputs/s3/logstash-output-s3/4.4.0/logstash-output-s3-4.4.0.jar
167
171
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
168
172
  licenses:
169
173
  - Apache-2.0
@@ -174,6 +178,7 @@ post_install_message:
174
178
  rdoc_options: []
175
179
  require_paths:
176
180
  - lib
181
+ - vendor/jar-dependencies
177
182
  required_ruby_version: !ruby/object:Gem::Requirement
178
183
  requirements:
179
184
  - - ">="