logstash-output-s3 4.3.7 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 238e7be91fe40e4fcb80736f3e8d62e76b6f8108be35e5a0c054cb19bb239428
4
- data.tar.gz: eb0c70181aa21d20b794cd05d2e458b322b10f6e343ae41d96a1e6f49cf85858
3
+ metadata.gz: 7fe328033b222f10871103a51430bfe6f6a269f15460f70e72e423b6b135927e
4
+ data.tar.gz: e845c48187f640a948624f7da5bf3b6cec3eee3b25c81f0024014494a71363e5
5
5
  SHA512:
6
- metadata.gz: 1194c4ee3defe1104fcc1c68d914c20e2ca2548ba6405960c75d4b398a7cd9949287c3410ead913ff82b5183164c4bd5bf858fe836f94f63654c582efab78ab5
7
- data.tar.gz: cb135c3b28297db0f5ad90a4f7ab4ba6b8c112574815e38bdccdec685f5caa28ed7636f68569586f71205d651118dad7cc7dd0194786efb59bfb8113fa7d0afb
6
+ metadata.gz: 9c5a89d3551c5d199b0b289c17b0a36138c4efbb4c4191610167039aab4692588a0c9fc17a158af788ca704d9fbd454a9f2de501b2aa34d17f5aee5820345829
7
+ data.tar.gz: 989eef2c121767e315199177f0d672bea2b2ff18fcb6a5a9acf21e8b813adadbcd35ab0ce221eff4a633bdaf735e9267b7943a3fbefb788dee42bdc4d1df293a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 4.4.0
2
+ - Logstash recovers corrupted gzip and uploads to S3 [#249](https://github.com/logstash-plugins/logstash-output-s3/pull/249)
3
+
1
4
  ## 4.3.7
2
5
  - Refactor: avoid usage of CHM (JRuby 9.3.4 work-around) [#248](https://github.com/logstash-plugins/logstash-output-s3/pull/248)
3
6
 
data/README.md CHANGED
@@ -19,7 +19,7 @@ Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/log
19
19
 
20
20
  ## Developing
21
21
 
22
- ### 1. Plugin Developement and Testing
22
+ ### 1. Plugin Development and Testing
23
23
 
24
24
  #### Code
25
25
  - To get started, you'll need JRuby with the Bundler gem installed.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 4.4.0
@@ -7,7 +7,7 @@ module LogStash
7
7
 
8
8
  def initialize(size_file)
9
9
  if size_file <= 0
10
- raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
11
  end
12
12
 
13
13
  @size_file = size_file
@@ -2,15 +2,23 @@
2
2
  require "thread"
3
3
  require "forwardable"
4
4
  require "fileutils"
5
+ require "logstash-output-s3_jars"
5
6
 
6
7
  module LogStash
7
8
  module Outputs
8
9
  class S3
9
- # Wrap the actual file descriptor into an utility classe
10
- # It make it more OOP and easier to reason with the paths.
10
+
11
+ java_import 'org.logstash.plugins.outputs.s3.GzipUtil'
12
+
13
+ # Wrap the actual file descriptor into an utility class
14
+ # Make it more OOP and easier to reason with the paths.
11
15
  class TemporaryFile
12
16
  extend Forwardable
13
17
 
18
+ GZIP_EXTENSION = "txt.gz"
19
+ TXT_EXTENSION = "txt"
20
+ RECOVERED_FILE_NAME_TAG = "-recovered"
21
+
14
22
  def_delegators :@fd, :path, :write, :close, :fsync
15
23
 
16
24
  attr_reader :fd
@@ -33,8 +41,10 @@ module LogStash
33
41
  def size
34
42
  # Use the fd size to get the accurate result,
35
43
  # so we dont have to deal with fsync
36
- # if the file is close we will use the File::size
44
+ # if the file is close, fd.size raises an IO exception so we use the File::size
37
45
  begin
46
+ # fd is nil when LS tries to recover gzip file but fails
47
+ return 0 unless @fd != nil
38
48
  @fd.size
39
49
  rescue IOError
40
50
  ::File.size(path)
@@ -45,7 +55,7 @@ module LogStash
45
55
  @key.gsub(/^\//, "")
46
56
  end
47
57
 
48
- # Each temporary file is made inside a directory named with an UUID,
58
+ # Each temporary file is created inside a directory named with an UUID,
49
59
  # instead of deleting the file directly and having the risk of deleting other files
50
60
  # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
61
  # a sandbox.
@@ -58,13 +68,46 @@ module LogStash
58
68
  size == 0
59
69
  end
60
70
 
71
+ # only to cover the case where LS cannot restore corrupted file, file is not exist
72
+ def recoverable?
73
+ !@fd.nil?
74
+ end
75
+
61
76
  def self.create_from_existing_file(file_path, temporary_folder)
62
77
  key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
78
 
79
+ # recover gzip file and compress back before uploading to S3
80
+ if file_path.end_with?("." + GZIP_EXTENSION)
81
+ file_path = self.recover(file_path)
82
+ end
64
83
  TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
- ::File.open(file_path, "r"),
84
+ ::File.exist?(file_path) ? ::File.open(file_path, "r") : nil, # for the nil case, file size will be 0 and upload will be ignored.
66
85
  ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
86
  end
87
+
88
+ def self.gzip_extension
89
+ GZIP_EXTENSION
90
+ end
91
+
92
+ def self.text_extension
93
+ TXT_EXTENSION
94
+ end
95
+
96
+ def self.recovery_file_name_tag
97
+ RECOVERED_FILE_NAME_TAG
98
+ end
99
+
100
+ private
101
+ def self.recover(file_path)
102
+ full_gzip_extension = "." + GZIP_EXTENSION
103
+ recovered_txt_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + "." + TXT_EXTENSION)
104
+ recovered_gzip_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + full_gzip_extension)
105
+ GzipUtil.recover(file_path, recovered_txt_file_path)
106
+ if ::File.exist?(recovered_txt_file_path) && !::File.zero?(recovered_txt_file_path)
107
+ GzipUtil.compress(recovered_txt_file_path, recovered_gzip_file_path)
108
+ end
109
+ recovered_gzip_file_path
110
+ end
68
111
  end
69
112
  end
70
113
  end
@@ -19,9 +19,6 @@ module LogStash
19
19
  # I do not have to mess around to check if the other directory have file in it before destroying them.
20
20
  class TemporaryFileFactory
21
21
  FILE_MODE = "a"
22
- GZIP_ENCODING = "gzip"
23
- GZIP_EXTENSION = "txt.gz"
24
- TXT_EXTENSION = "txt"
25
22
  STRFTIME = "%Y-%m-%dT%H.%M"
26
23
 
27
24
  attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
@@ -48,7 +45,7 @@ module LogStash
48
45
 
49
46
  private
50
47
  def extension
51
- gzip? ? GZIP_EXTENSION : TXT_EXTENSION
48
+ gzip? ? TemporaryFile.gzip_extension : TemporaryFile.text_extension
52
49
  end
53
50
 
54
51
  def gzip?
@@ -31,6 +31,7 @@ module LogStash
31
31
  end
32
32
  end
33
33
 
34
+ # uploads a TemporaryFile to S3
34
35
  def upload(file, options = {})
35
36
  upload_options = options.fetch(:upload_options, {})
36
37
 
@@ -68,6 +69,7 @@ module LogStash
68
69
  @workers_pool.shutdown
69
70
  @workers_pool.wait_for_termination(nil) # block until its done
70
71
  end
72
+
71
73
  end
72
74
  end
73
75
  end
@@ -97,6 +97,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
97
97
  :fallback_policy => :caller_runs
98
98
  })
99
99
 
100
+ GZIP_ENCODING = "gzip"
100
101
 
101
102
  config_name "s3"
102
103
  default :codec, "line"
@@ -181,7 +182,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
181
182
  config :tags, :validate => :array, :default => []
182
183
 
183
184
  # Specify the content encoding. Supports ("gzip"). Defaults to "none"
184
- config :encoding, :validate => ["none", "gzip"], :default => "none"
185
+ config :encoding, :validate => ["none", GZIP_ENCODING], :default => "none"
185
186
 
186
187
  # Define the strategy to use to decide when we need to rotate the file and push it to S3,
187
188
  # The default strategy is to check for both size and time, the first one to match will rotate the file.
@@ -315,7 +316,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
315
316
  :server_side_encryption => @server_side_encryption ? @server_side_encryption_algorithm : nil,
316
317
  :ssekms_key_id => @server_side_encryption_algorithm == "aws:kms" ? @ssekms_key_id : nil,
317
318
  :storage_class => @storage_class,
318
- :content_encoding => @encoding == "gzip" ? "gzip" : nil,
319
+ :content_encoding => @encoding == GZIP_ENCODING ? GZIP_ENCODING : nil,
319
320
  :multipart_threshold => @upload_multipart_threshold
320
321
  }
321
322
  end
@@ -397,16 +398,48 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
397
398
  @crash_uploader = Uploader.new(bucket_resource, @logger, CRASH_RECOVERY_THREADPOOL)
398
399
 
399
400
  temp_folder_path = Pathname.new(@temporary_directory)
400
- Dir.glob(::File.join(@temporary_directory, "**/*"))
401
- .select { |file| ::File.file?(file) }
402
- .each do |file|
403
- temp_file = TemporaryFile.create_from_existing_file(file, temp_folder_path)
404
- if temp_file.size > 0
405
- @logger.debug? && @logger.debug("Recovering from crash and uploading", :path => temp_file.path)
406
- @crash_uploader.upload_async(temp_file, :on_complete => method(:clean_temporary_file), :upload_options => upload_options)
401
+ files = Dir.glob(::File.join(@temporary_directory, "**/*"))
402
+ .select { |file_path| ::File.file?(file_path) }
403
+ under_recovery_files = get_under_recovery_files(files)
404
+
405
+ files.each do |file_path|
406
+ # when encoding is GZIP, if file is already recovering or recovered and uploading to S3, log and skip
407
+ if under_recovery_files.include?(file_path)
408
+ unless file_path.include?(TemporaryFile.gzip_extension)
409
+ @logger.warn("The #{file_path} file either under recover process or failed to recover before.")
410
+ end
407
411
  else
408
- clean_temporary_file(temp_file)
412
+ temp_file = TemporaryFile.create_from_existing_file(file_path, temp_folder_path)
413
+ # do not remove or upload if Logstash tries to recover file but fails
414
+ if temp_file.recoverable?
415
+ if temp_file.size > 0
416
+ @logger.debug? && @logger.debug("Recovering from crash and uploading", :path => temp_file.path)
417
+ @crash_uploader.upload_async(temp_file,
418
+ :on_complete => method(:clean_temporary_file),
419
+ :upload_options => upload_options)
420
+ else
421
+ clean_temporary_file(temp_file)
422
+ end
423
+ end
424
+ end
425
+ end
426
+ end
427
+
428
+ # figures out the recovering files and
429
+ # creates a skip list to ignore for the rest of processes
430
+ def get_under_recovery_files(files)
431
+ skip_files = Set.new
432
+ return skip_files unless @encoding == GZIP_ENCODING
433
+
434
+ files.each do |file_path|
435
+ if file_path.include?(TemporaryFile.recovery_file_name_tag)
436
+ skip_files << file_path
437
+ if file_path.include?(TemporaryFile.gzip_extension)
438
+ # also include the original corrupted gzip file
439
+ skip_files << file_path.gsub(TemporaryFile.recovery_file_name_tag, "")
440
+ end
409
441
  end
410
442
  end
443
+ skip_files
411
444
  end
412
445
  end
@@ -0,0 +1,4 @@
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
2
+
3
+ require 'jar_dependencies'
4
+ require_jar('org.logstash.plugins.outputs.s3', 'logstash-output-s3', '4.4.0')
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require "jars/installer"
3
+ require "fileutils"
4
+
5
+ task :vendor do
6
+ exit(1) unless system './gradlew vendor'
7
+ version = File.read("VERSION").strip
8
+ end
9
+
10
+ desc "clean"
11
+ task :clean do
12
+ ["build", "vendor/jar-dependencies", "Gemfile.lock"].each do |p|
13
+ FileUtils.rm_rf(p)
14
+ end
15
+ end
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-s3'
3
- s.version = '4.3.7'
3
+ s.version = '4.4.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Sends Logstash events to the Amazon Simple Storage Service"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
7
7
  s.authors = ["Elastic"]
8
8
  s.email = 'info@elastic.co'
9
9
  s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
10
- s.require_paths = ["lib"]
10
+ s.require_paths = ["lib", "vendor/jar-dependencies"]
11
11
 
12
12
  # Files
13
13
  s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
@@ -7,18 +7,17 @@ require "stud/temporary"
7
7
  describe "Restore from crash", :integration => true do
8
8
  include_context "setup plugin"
9
9
 
10
- let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
11
-
12
10
  let(:number_of_files) { 5 }
13
11
  let(:dummy_content) { "foobar\n" * 100 }
14
- let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
15
12
 
16
13
  before do
17
14
  clean_remote_files(prefix)
18
15
  end
19
16
 
20
-
21
17
  context 'with a non-empty tempfile' do
18
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
19
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
20
+
22
21
  before do
23
22
  # Creating a factory always create a file
24
23
  factory.current.write(dummy_content)
@@ -41,6 +40,9 @@ describe "Restore from crash", :integration => true do
41
40
  end
42
41
 
43
42
  context 'with an empty tempfile' do
43
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
44
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
45
+
44
46
  before do
45
47
  factory.current
46
48
  factory.rotate!
@@ -63,5 +65,68 @@ describe "Restore from crash", :integration => true do
63
65
  expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
64
66
  end
65
67
  end
68
+
69
+ context "#gzip encoding" do
70
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write", "encoding" => "gzip" }) }
71
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "gzip", temporary_directory)}
72
+ describe "with empty recovered file" do
73
+ before do
74
+ # Creating a factory always create a file
75
+ factory.current.write('')
76
+ factory.current.fsync
77
+ factory.current.close
78
+ end
79
+
80
+ it 'should not upload and not remove temp file' do
81
+ subject.register
82
+ try(20) do
83
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
84
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(1)
85
+ end
86
+ end
87
+ end
88
+
89
+ describe "with healthy recovered, size is greater than zero file" do
90
+ before do
91
+ # Creating a factory always create a file
92
+ factory.current.write(dummy_content)
93
+ factory.current.fsync
94
+ factory.current.close
95
+
96
+ (number_of_files - 1).times do
97
+ factory.rotate!
98
+ factory.current.write(dummy_content)
99
+ factory.current.fsync
100
+ factory.current.close
101
+ end
102
+ end
103
+
104
+ it 'should recover, upload to S3 and remove temp file' do
105
+ subject.register
106
+ try(20) do
107
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
108
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
109
+ expect(bucket_resource.objects(:prefix => prefix).first.acl.grants.collect(&:permission)).to include("READ", "WRITE")
110
+ end
111
+ end
112
+ end
113
+
114
+ describe "with failure when recovering" do
115
+ before do
116
+ # Creating a factory always create a file
117
+ factory.current.write(dummy_content)
118
+ factory.current.fsync
119
+ end
120
+
121
+ it 'should not upload to S3 and not remove temp file' do
122
+ subject.register
123
+ try(20) do
124
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
125
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(1)
126
+ end
127
+ end
128
+ end
129
+ end
130
+
66
131
  end
67
132
 
@@ -25,11 +25,11 @@ describe LogStash::Outputs::S3::SizeRotationPolicy do
25
25
  end
26
26
 
27
27
  it "raises an exception if the `size_file` is 0" do
28
- expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /need to be greather than 0/)
28
+ expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /need to be greater than 0/)
29
29
  end
30
30
 
31
31
  it "raises an exception if the `size_file` is < 0" do
32
- expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /need to be greather than 0/)
32
+ expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /need to be greater than 0/)
33
33
  end
34
34
 
35
35
  context "#needs_periodic?" do
@@ -5,6 +5,7 @@ shared_context "setup plugin" do
5
5
  let(:bucket) { ENV["AWS_LOGSTASH_TEST_BUCKET"] }
6
6
  let(:access_key_id) { ENV["AWS_ACCESS_KEY_ID"] }
7
7
  let(:secret_access_key) { ENV["AWS_SECRET_ACCESS_KEY"] }
8
+ let(:session_token) { ENV["AWS_SESSION_TOKEN"] }
8
9
  let(:size_file) { 100 }
9
10
  let(:time_file) { 100 }
10
11
  let(:tags) { [] }
@@ -18,6 +19,7 @@ shared_context "setup plugin" do
18
19
  "temporary_directory" => temporary_directory,
19
20
  "access_key_id" => access_key_id,
20
21
  "secret_access_key" => secret_access_key,
22
+ "session_token" => session_token,
21
23
  "size_file" => size_file,
22
24
  "time_file" => time_file,
23
25
  "region" => region,
@@ -25,7 +27,7 @@ shared_context "setup plugin" do
25
27
  }
26
28
  end
27
29
 
28
- let(:client_credentials) { Aws::Credentials.new(access_key_id, secret_access_key) }
30
+ let(:client_credentials) { Aws::Credentials.new(access_key_id, secret_access_key, session_token) }
29
31
  let(:bucket_resource) { Aws::S3::Bucket.new(bucket, { :credentials => client_credentials, :region => region }) }
30
32
 
31
33
  subject { LogStash::Outputs::S3.new(options) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.3.7
4
+ version: 4.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-16 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -129,7 +129,9 @@ files:
129
129
  - LICENSE
130
130
  - NOTICE.TXT
131
131
  - README.md
132
+ - VERSION
132
133
  - docs/index.asciidoc
134
+ - lib/logstash-output-s3_jars.rb
133
135
  - lib/logstash/outputs/s3.rb
134
136
  - lib/logstash/outputs/s3/file_repository.rb
135
137
  - lib/logstash/outputs/s3/patch.rb
@@ -142,6 +144,7 @@ files:
142
144
  - lib/logstash/outputs/s3/uploader.rb
143
145
  - lib/logstash/outputs/s3/writable_directory_validator.rb
144
146
  - lib/logstash/outputs/s3/write_bucket_permission_validator.rb
147
+ - lib/tasks/build.rake
145
148
  - logstash-output-s3.gemspec
146
149
  - spec/integration/dynamic_prefix_spec.rb
147
150
  - spec/integration/gzip_file_spec.rb
@@ -164,6 +167,7 @@ files:
164
167
  - spec/outputs/s3_spec.rb
165
168
  - spec/spec_helper.rb
166
169
  - spec/supports/helpers.rb
170
+ - vendor/jar-dependencies/org/logstash/plugins/outputs/s3/logstash-output-s3/4.4.0/logstash-output-s3-4.4.0.jar
167
171
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
168
172
  licenses:
169
173
  - Apache-2.0
@@ -174,6 +178,7 @@ post_install_message:
174
178
  rdoc_options: []
175
179
  require_paths:
176
180
  - lib
181
+ - vendor/jar-dependencies
177
182
  required_ruby_version: !ruby/object:Gem::Requirement
178
183
  requirements:
179
184
  - - ">="