logstash-integration-aws 7.0.0 → 7.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4cea87525319253265e10b11632067a0470dd461bcd6febad35e13bb94d5e2b2
4
- data.tar.gz: ae9b187795bc416f1c1e7e16b4207b8fc476b54c4cbc55325104f91930a65ae1
3
+ metadata.gz: cee9f1de988917319a825f2297f67e3c436cf887eaea605b0ae0a863e7b9a2b8
4
+ data.tar.gz: 357432106dd5710a0c8a4addda66922c5d7f66c4c6fbf82bba638ad71f595ab2
5
5
  SHA512:
6
- metadata.gz: e62194ffbe42ac97834a189eb9821feed9eeebd28bb72ffed26de4b087d76b75ca30006c630d2d30f1892f7dc41825e176bf25fd97487e3554aec5fc7e847b36
7
- data.tar.gz: 30e9270e7933fd448ff1e9ae41f768edbe6fd956719c79e9f828743e583d06ddef1fa2ab55462df6c7334990b01e14fb766290c1a8f9e405cf1e8673a99f4d83
6
+ metadata.gz: e3b500c0b652a50498c85f204bee216965b04b8468cb2a1b798d82e08bd1d1b6d73876521770c6bacc956786bb749342316d06bd7254af140b11f3660922a2ec
7
+ data.tar.gz: ff157154dc10816d86a035cac58b85a8cf13841e9948026113386a66c941bf6b9256f882d1811cd18824fda7348ae9c9dd66e9ab390ef6438f4021845521e849
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 7.1.0
2
+ - Plugin restores and uploads corrupted GZIP files (caused by abnormal termination) to AWS S3 [#20](https://github.com/logstash-plugins/logstash-integration-aws/pull/20)
3
+
4
+ ## 7.0.1
5
+ - resolves two closely-related race conditions in the S3 Output plugin's handling of stale temporary files that could cause plugin crashes or data-loss [#19](https://github.com/logstash-plugins/logstash-integration-aws/pull/19)
6
+ - prevents a `No such file or directory` crash that could occur when a temporary file is accessed after it has been detected as stale (empty+old) and deleted.
7
+ - prevents a possible deletion of a non-empty temporary file that could occur if bytes were written to it _after_ it was detected as stale (empty+old) and _before_ the deletion completed.
8
+
1
9
  ## 7.0.0
2
10
  - bump integration to upper bound of all underlying plugins versions (biggest is sqs output 6.x)
3
11
  - this is necessary to facilitate versioning continuity between older standalone plugins and plugins within the integration
data/README.md CHANGED
@@ -19,7 +19,7 @@ Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/log
19
19
 
20
20
  ## Developing
21
21
 
22
- ### 1. Plugin Developement and Testing
22
+ ### 1. Plugin Development and Testing
23
23
 
24
24
  #### Code
25
25
  - To get started, you'll need JRuby with the Bundler gem installed.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 7.1.0
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: cloudfront
3
3
  :type: codec
4
4
 
@@ -17,7 +17,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
17
17
 
18
18
  === Cloudfront codec plugin
19
19
 
20
- // include::{include_path}/plugin_header-integration.asciidoc[]
20
+ include::{include_path}/plugin_header-integration.asciidoc[]
21
21
 
22
22
  ==== Description
23
23
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: cloudtrail
3
3
  :type: codec
4
4
 
@@ -17,7 +17,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
17
17
 
18
18
  === Cloudtrail codec plugin
19
19
 
20
- // include::{include_path}/plugin_header-integration.asciidoc[]
20
+ include::{include_path}/plugin_header-integration.asciidoc[]
21
21
 
22
22
  ==== Description
23
23
 
data/docs/index.asciidoc CHANGED
@@ -17,13 +17,12 @@ END - GENERATED VARIABLES, DO NOT EDIT!
17
17
 
18
18
  === AWS Integration Plugin
19
19
 
20
- // include::{include_path}/plugin_header.asciidoc[]
20
+ include::{include_path}/plugin_header.asciidoc[]
21
21
 
22
22
  ==== Description
23
23
 
24
24
  The AWS Integration Plugin provides integrated plugins for working with Amazon Web Services:
25
25
 
26
- ////
27
26
  - {logstash-ref}/plugins-codecs-cloudfront.html[Cloudfront Codec Plugin]
28
27
  - {logstash-ref}/plugins-codecs-cloudtrail.html[Cloudtrail Codec Plugin]
29
28
  - {logstash-ref}/plugins-inputs-cloudwatch.html[Cloudwatch Input Plugin]
@@ -33,6 +32,5 @@ The AWS Integration Plugin provides integrated plugins for working with Amazon W
33
32
  - {logstash-ref}/plugins-outputs-s3.html[S3 Output Plugin]
34
33
  - {logstash-ref}/plugins-outputs-sns.html[Sns Output Plugin]
35
34
  - {logstash-ref}/plugins-outputs-sqs.html[Sqs Output Plugin]
36
- ////
37
35
 
38
36
  :no_codec!:
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: cloudwatch
3
3
  :type: input
4
4
  :default_codec: plain
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === Cloudwatch input plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: s3
3
3
  :type: input
4
4
  :default_codec: plain
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === S3 input plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: sqs
3
3
  :type: input
4
4
  :default_codec: json
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === Sqs input plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: cloudwatch
3
3
  :type: output
4
4
  :default_codec: plain
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === Cloudwatch output plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: s3
3
3
  :type: output
4
4
  :default_codec: line
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === S3 output plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: sns
3
3
  :type: output
4
4
  :default_codec: plain
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === Sns output plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -1,4 +1,4 @@
1
- // :integration: aws
1
+ :integration: aws
2
2
  :plugin: sqs
3
3
  :type: output
4
4
  :default_codec: json
@@ -18,7 +18,7 @@ END - GENERATED VARIABLES, DO NOT EDIT!
18
18
 
19
19
  === Sqs output plugin
20
20
 
21
- // include::{include_path}/plugin_header-integration.asciidoc[]
21
+ include::{include_path}/plugin_header-integration.asciidoc[]
22
22
 
23
23
  ==== Description
24
24
 
@@ -17,8 +17,9 @@ module LogStash
17
17
  class PrefixedValue
18
18
  def initialize(file_factory, stale_time)
19
19
  @file_factory = file_factory
20
- @lock = Mutex.new
20
+ @lock = Monitor.new # reentrant Mutex
21
21
  @stale_time = stale_time
22
+ @is_deleted = false
22
23
  end
23
24
 
24
25
  def with_lock
@@ -36,7 +37,14 @@ module LogStash
36
37
  end
37
38
 
38
39
  def delete!
39
- with_lock{ |factory| factory.current.delete! }
40
+ with_lock do |factory|
41
+ factory.current.delete!
42
+ @is_deleted = true
43
+ end
44
+ end
45
+
46
+ def deleted?
47
+ with_lock { |_| @is_deleted }
40
48
  end
41
49
  end
42
50
 
@@ -72,19 +80,70 @@ module LogStash
72
80
  @prefixed_factories.keySet
73
81
  end
74
82
 
83
+ ##
84
+ # Yields the current file of each non-deleted file factory while the current thread has exclusive access to it.
85
+ # @yieldparam file [TemporaryFile]
86
+ # @return [void]
75
87
  def each_files
76
- @prefixed_factories.elements.each do |prefixed_file|
77
- prefixed_file.with_lock { |factory| yield factory.current }
88
+ each_factory(keys) do |factory|
89
+ yield factory.current
78
90
  end
91
+ nil # void return avoid leaking unsynchronized access
79
92
  end
80
93
 
81
- # Return the file factory
94
+ ##
95
+ # Yields the file factory while the current thread has exclusive access to it, creating a new
96
+ # one if one does not exist or if the current one is being reaped by the stale watcher.
97
+ # @param prefix_key [String]: the prefix key
98
+ # @yieldparam factory [TemporaryFileFactory]: a temporary file factory that this thread has exclusive access to
99
+ # @return [void]
82
100
  def get_factory(prefix_key)
83
- @prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
101
+ # fast-path: if factory exists and is not deleted, yield it with exclusive access and return
102
+ prefix_val = @prefixed_factories.get(prefix_key)
103
+ prefix_val&.with_lock do |factory|
104
+ # intentional local-jump to ensure deletion detection
105
+ # is done inside the exclusive access.
106
+ unless prefix_val.deleted?
107
+ yield(factory)
108
+ return nil # void return avoid leaking unsynchronized access
109
+ end
110
+ end
111
+
112
+ # slow-path:
113
+ # the ConcurrentHashMap#get operation is lock-free, but may have returned an entry that was being deleted by
114
+ # another thread (such as via stale detection). If we failed to retrieve a value, or retrieved one that had
115
+ # been marked deleted, use the atomic ConcurrentHashMap#compute to retrieve a non-deleted entry.
116
+ prefix_val = @prefixed_factories.compute(prefix_key) do |_, existing|
117
+ existing && !existing.deleted? ? existing : @factory_initializer.apply(prefix_key)
118
+ end
119
+ prefix_val.with_lock { |factory| yield factory }
120
+ nil # void return avoid leaking unsynchronized access
121
+ end
122
+
123
+ ##
124
+ # Yields each non-deleted file factory while the current thread has exclusive access to it.
125
+ # @param prefixes [Array<String>]: the prefix keys
126
+ # @yieldparam factory [TemporaryFileFactory]
127
+ # @return [void]
128
+ def each_factory(prefixes)
129
+ prefixes.each do |prefix_key|
130
+ prefix_val = @prefixed_factories.get(prefix_key)
131
+ prefix_val&.with_lock do |factory|
132
+ yield factory unless prefix_val.deleted?
133
+ end
134
+ end
135
+ nil # void return avoid leaking unsynchronized access
84
136
  end
85
137
 
138
+ ##
139
+ # Ensures that a non-deleted factory exists for the provided prefix and yields its current file
140
+ # while the current thread has exclusive access to it.
141
+ # @param prefix_key [String]
142
+ # @yieldparam file [TemporaryFile]
143
+ # @return [void]
86
144
  def get_file(prefix_key)
87
145
  get_factory(prefix_key) { |factory| yield factory.current }
146
+ nil # void return avoid leaking unsynchronized access
88
147
  end
89
148
 
90
149
  def shutdown
@@ -95,10 +154,21 @@ module LogStash
95
154
  @prefixed_factories.size
96
155
  end
97
156
 
98
- def remove_stale(k, v)
99
- if v.stale?
100
- @prefixed_factories.remove(k, v)
101
- v.delete!
157
+ def remove_if_stale(prefix_key)
158
+ # we use the ATOMIC `ConcurrentHashMap#computeIfPresent` to atomically
159
+ # detect the staleness, mark a stale prefixed factory as deleted, and delete from the map.
160
+ @prefixed_factories.computeIfPresent(prefix_key) do |_, prefixed_factory|
161
+ # once we have retrieved an instance, we acquire exclusive access to it
162
+ # for stale detection, marking it as deleted before releasing the lock
163
+ # and causing it to become deleted from the map.
164
+ prefixed_factory.with_lock do |_|
165
+ if prefixed_factory.stale?
166
+ prefixed_factory.delete! # mark deleted to prevent reuse
167
+ nil # cause deletion
168
+ else
169
+ prefixed_factory # keep existing
170
+ end
171
+ end
102
172
  end
103
173
  end
104
174
 
@@ -106,7 +176,9 @@ module LogStash
106
176
  @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
107
177
  LogStash::Util.set_thread_name("S3, Stale factory sweeper")
108
178
 
109
- @prefixed_factories.forEach{|k,v| remove_stale(k,v)}
179
+ @prefixed_factories.keys.each do |prefix|
180
+ remove_if_stale(prefix)
181
+ end
110
182
  end
111
183
 
112
184
  @stale_sweeper.execute
@@ -7,7 +7,7 @@ module LogStash
7
7
 
8
8
  def initialize(size_file)
9
9
  if size_file <= 0
10
- raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
11
  end
12
12
 
13
13
  @size_file = size_file
@@ -2,15 +2,23 @@
2
2
  require "thread"
3
3
  require "forwardable"
4
4
  require "fileutils"
5
+ require "logstash-integration-aws_jars"
5
6
 
6
7
  module LogStash
7
8
  module Outputs
8
9
  class S3
9
- # Wrap the actual file descriptor into an utility classe
10
- # It make it more OOP and easier to reason with the paths.
10
+
11
+ java_import 'org.logstash.plugins.integration.outputs.s3.GzipUtil'
12
+
13
+ # Wrap the actual file descriptor into an utility class
14
+ # Make it more OOP and easier to reason with the paths.
11
15
  class TemporaryFile
12
16
  extend Forwardable
13
17
 
18
+ GZIP_EXTENSION = "txt.gz"
19
+ TXT_EXTENSION = "txt"
20
+ RECOVERED_FILE_NAME_TAG = "-recovered"
21
+
14
22
  def_delegators :@fd, :path, :write, :close, :fsync
15
23
 
16
24
  attr_reader :fd
@@ -33,8 +41,10 @@ module LogStash
33
41
  def size
34
42
  # Use the fd size to get the accurate result,
35
43
  # so we dont have to deal with fsync
36
- # if the file is close we will use the File::size
44
+ # if the file is close, fd.size raises an IO exception so we use the File::size
37
45
  begin
46
+ # fd is nil when LS tries to recover gzip file but fails
47
+ return 0 unless @fd != nil
38
48
  @fd.size
39
49
  rescue IOError
40
50
  ::File.size(path)
@@ -45,7 +55,7 @@ module LogStash
45
55
  @key.gsub(/^\//, "")
46
56
  end
47
57
 
48
- # Each temporary file is made inside a directory named with an UUID,
58
+ # Each temporary file is created inside a directory named with an UUID,
49
59
  # instead of deleting the file directly and having the risk of deleting other files
50
60
  # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
61
  # a sandbox.
@@ -58,12 +68,45 @@ module LogStash
58
68
  size == 0
59
69
  end
60
70
 
71
+ # only to cover the case where LS cannot restore corrupted file, file is not exist
72
+ def recoverable?
73
+ !@fd.nil?
74
+ end
75
+
61
76
  def self.create_from_existing_file(file_path, temporary_folder)
62
77
  key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
78
 
79
+ # recover gzip file and compress back before uploading to S3
80
+ if file_path.end_with?("." + GZIP_EXTENSION)
81
+ file_path = self.recover(file_path)
82
+ end
64
83
  TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
- ::File.open(file_path, "r"),
66
- ::File.join(temporary_folder, key_parts.slice(0, 1)))
84
+ ::File.exist?(file_path) ? ::File.open(file_path, "r") : nil, # for the nil case, file size will be 0 and upload will be ignored.
85
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
86
+ end
87
+
88
+ def self.gzip_extension
89
+ GZIP_EXTENSION
90
+ end
91
+
92
+ def self.text_extension
93
+ TXT_EXTENSION
94
+ end
95
+
96
+ def self.recovery_file_name_tag
97
+ RECOVERED_FILE_NAME_TAG
98
+ end
99
+
100
+ private
101
+ def self.recover(file_path)
102
+ full_gzip_extension = "." + GZIP_EXTENSION
103
+ recovered_txt_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + "." + TXT_EXTENSION)
104
+ recovered_gzip_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + full_gzip_extension)
105
+ GzipUtil.recover(file_path, recovered_txt_file_path)
106
+ if ::File.exist?(recovered_txt_file_path) && !::File.zero?(recovered_txt_file_path)
107
+ GzipUtil.compress(recovered_txt_file_path, recovered_gzip_file_path)
108
+ end
109
+ recovered_gzip_file_path
67
110
  end
68
111
  end
69
112
  end
@@ -19,9 +19,6 @@ module LogStash
19
19
  # I do not have to mess around to check if the other directory have file in it before destroying them.
20
20
  class TemporaryFileFactory
21
21
  FILE_MODE = "a"
22
- GZIP_ENCODING = "gzip"
23
- GZIP_EXTENSION = "txt.gz"
24
- TXT_EXTENSION = "txt"
25
22
  STRFTIME = "%Y-%m-%dT%H.%M"
26
23
 
27
24
  attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
@@ -48,7 +45,7 @@ module LogStash
48
45
 
49
46
  private
50
47
  def extension
51
- gzip? ? GZIP_EXTENSION : TXT_EXTENSION
48
+ gzip? ? TemporaryFile.gzip_extension : TemporaryFile.text_extension
52
49
  end
53
50
 
54
51
  def gzip?
@@ -7,7 +7,7 @@ module LogStash
7
7
 
8
8
  def initialize(time_file)
9
9
  if time_file <= 0
10
- raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greater than 0"
11
11
  end
12
12
 
13
13
  @time_file = time_file * 60
@@ -31,6 +31,7 @@ module LogStash
31
31
  end
32
32
  end
33
33
 
34
+ # uploads a TemporaryFile to S3
34
35
  def upload(file, options = {})
35
36
  upload_options = options.fetch(:upload_options, {})
36
37
 
@@ -68,6 +69,7 @@ module LogStash
68
69
  @workers_pool.shutdown
69
70
  @workers_pool.wait_for_termination(nil) # block until its done
70
71
  end
72
+
71
73
  end
72
74
  end
73
75
  end
@@ -94,6 +94,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
94
94
  :fallback_policy => :caller_runs
95
95
  })
96
96
 
97
+ GZIP_ENCODING = "gzip"
97
98
 
98
99
  config_name "s3"
99
100
  default :codec, "line"
@@ -107,7 +108,8 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
107
108
 
108
109
  # Set the size of file in bytes, this means that files on bucket when have dimension > file_size, they are stored in two or more file.
109
110
  # If you have tags then it will generate a specific size file for every tags
110
- ##NOTE: define size of file is the better thing, because generate a local temporary file on disk and then put it in bucket.
111
+ #
112
+ # NOTE: define size of file is the better thing, because generate a local temporary file on disk and then put it in bucket.
111
113
  config :size_file, :validate => :number, :default => 1024 * 1024 * 5
112
114
 
113
115
  # Set the time, in MINUTES, to close the current sub_time_section of bucket.
@@ -115,10 +117,10 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
115
117
  # If it's valued 0 and rotation_strategy is 'time' or 'size_and_time' then the plugin reaise a configuration error.
116
118
  config :time_file, :validate => :number, :default => 15
117
119
 
118
- ## IMPORTANT: if you use multiple instance of s3, you should specify on one of them the "restore=> true" and on the others "restore => false".
119
- ## This is hack for not destroy the new files after restoring the initial files.
120
- ## If you do not specify "restore => true" when logstash crashes or is restarted, the files are not sent into the bucket,
121
- ## for example if you have single Instance.
120
+ # If `restore => false` is specified and Logstash crashes, the unprocessed files are not sent into the bucket.
121
+ #
122
+ # NOTE: that the `recovery => true` default assumes multiple S3 outputs would set a unique `temporary_directory => ...`
123
+ # if they do not than only a single S3 output is safe to recover (since let-over files are processed and deleted).
122
124
  config :restore, :validate => :boolean, :default => true
123
125
 
124
126
  # The S3 canned ACL to use when putting the file. Defaults to "private".
@@ -144,6 +146,9 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
144
146
 
145
147
  # Set the directory where logstash will store the tmp files before sending it to S3
146
148
  # default to the current OS temporary directory in linux /tmp/logstash
149
+ #
150
+ # NOTE: the reason we do not have a unique (isolated) temporary directory as a default, to support multiple plugin instances,
151
+ # is that we would have to rely on something static that does not change between restarts (e.g. a user set id => ...).
147
152
  config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
148
153
 
149
154
  # Specify a prefix to the uploaded filename, this can simulate directories on S3. Prefix does not require leading slash.
@@ -174,7 +179,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
174
179
  config :tags, :validate => :array, :default => []
175
180
 
176
181
  # Specify the content encoding. Supports ("gzip"). Defaults to "none"
177
- config :encoding, :validate => ["none", "gzip"], :default => "none"
182
+ config :encoding, :validate => ["none", GZIP_ENCODING], :default => "none"
178
183
 
179
184
  # Define the strategy to use to decide when we need to rotate the file and push it to S3,
180
185
  # The default strategy is to check for both size and time, the first one to match will rotate the file.
@@ -258,6 +263,8 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
258
263
 
259
264
  @logger.debug("Uploading current workspace")
260
265
 
266
+ @file_repository.shutdown # stop stale sweeps
267
+
261
268
  # The plugin has stopped receiving new events, but we still have
262
269
  # data on disk, lets make sure it get to S3.
263
270
  # If Logstash get interrupted, the `restore_from_crash` (when set to true) method will pickup
@@ -267,8 +274,6 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
267
274
  upload_file(file)
268
275
  end
269
276
 
270
- @file_repository.shutdown
271
-
272
277
  @uploader.stop # wait until all the current upload are complete
273
278
  @crash_uploader.stop if @restore # we might have still work to do for recovery so wait until we are done
274
279
  end
@@ -308,7 +313,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
308
313
  :server_side_encryption => @server_side_encryption ? @server_side_encryption_algorithm : nil,
309
314
  :ssekms_key_id => @server_side_encryption_algorithm == "aws:kms" ? @ssekms_key_id : nil,
310
315
  :storage_class => @storage_class,
311
- :content_encoding => @encoding == "gzip" ? "gzip" : nil,
316
+ :content_encoding => @encoding == GZIP_ENCODING ? GZIP_ENCODING : nil,
312
317
  :multipart_threshold => @upload_multipart_threshold
313
318
  }
314
319
  end
@@ -336,28 +341,28 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
336
341
  end
337
342
 
338
343
  def rotate_if_needed(prefixes)
339
- prefixes.each do |prefix|
340
- # Each file access is thread safe,
341
- # until the rotation is done then only
342
- # one thread has access to the resource.
343
- @file_repository.get_factory(prefix) do |factory|
344
- temp_file = factory.current
345
-
346
- if @rotation.rotate?(temp_file)
347
- @logger.debug("Rotate file",
348
- :strategy => @rotation.class.name,
349
- :key => temp_file.key,
350
- :path => temp_file.path)
351
-
352
- upload_file(temp_file)
353
- factory.rotate!
354
- end
344
+ # Each file access is thread safe,
345
+ # until the rotation is done then only
346
+ # one thread has access to the resource.
347
+ @file_repository.each_factory(prefixes) do |factory|
348
+ # we have exclusive access to the one-and-only
349
+ # prefix WRAPPER for this factory.
350
+ temp_file = factory.current
351
+
352
+ if @rotation.rotate?(temp_file)
353
+ @logger.debug? && @logger.debug("Rotate file",
354
+ :key => temp_file.key,
355
+ :path => temp_file.path,
356
+ :strategy => @rotation.class.name)
357
+
358
+ upload_file(temp_file) # may be async or blocking
359
+ factory.rotate!
355
360
  end
356
361
  end
357
362
  end
358
363
 
359
364
  def upload_file(temp_file)
360
- @logger.debug("Queue for upload", :path => temp_file.path)
365
+ @logger.debug? && @logger.debug("Queue for upload", :path => temp_file.path)
361
366
 
362
367
  # if the queue is full the calling thread will be used to upload
363
368
  temp_file.close # make sure the content is on disk
@@ -380,7 +385,7 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
380
385
  end
381
386
 
382
387
  def clean_temporary_file(file)
383
- @logger.debug("Removing temporary file", :file => file.path)
388
+ @logger.debug? && @logger.debug("Removing temporary file", :path => file.path)
384
389
  file.delete!
385
390
  end
386
391
 
@@ -390,16 +395,48 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
390
395
  @crash_uploader = Uploader.new(bucket_resource, @logger, CRASH_RECOVERY_THREADPOOL)
391
396
 
392
397
  temp_folder_path = Pathname.new(@temporary_directory)
393
- Dir.glob(::File.join(@temporary_directory, "**/*"))
394
- .select { |file| ::File.file?(file) }
395
- .each do |file|
396
- temp_file = TemporaryFile.create_from_existing_file(file, temp_folder_path)
397
- if temp_file.size > 0
398
- @logger.debug("Recovering from crash and uploading", :file => temp_file.path)
399
- @crash_uploader.upload_async(temp_file, :on_complete => method(:clean_temporary_file), :upload_options => upload_options)
398
+ files = Dir.glob(::File.join(@temporary_directory, "**/*"))
399
+ .select { |file_path| ::File.file?(file_path) }
400
+ under_recovery_files = get_under_recovery_files(files)
401
+
402
+ files.each do |file_path|
403
+ # when encoding is GZIP, if file is already recovering or recovered and uploading to S3, log and skip
404
+ if under_recovery_files.include?(file_path)
405
+ unless file_path.include?(TemporaryFile.gzip_extension)
406
+ @logger.warn("The #{file_path} file either under recover process or failed to recover before.")
407
+ end
400
408
  else
401
- clean_temporary_file(temp_file)
409
+ temp_file = TemporaryFile.create_from_existing_file(file_path, temp_folder_path)
410
+ # do not remove or upload if Logstash tries to recover file but fails
411
+ if temp_file.recoverable?
412
+ if temp_file.size > 0
413
+ @logger.debug? && @logger.debug("Recovering from crash and uploading", :path => temp_file.path)
414
+ @crash_uploader.upload_async(temp_file,
415
+ :on_complete => method(:clean_temporary_file),
416
+ :upload_options => upload_options)
417
+ else
418
+ clean_temporary_file(temp_file)
419
+ end
420
+ end
421
+ end
422
+ end
423
+ end
424
+
425
+ # figures out the recovering files and
426
+ # creates a skip list to ignore for the rest of processes
427
+ def get_under_recovery_files(files)
428
+ skip_files = Set.new
429
+ return skip_files unless @encoding == GZIP_ENCODING
430
+
431
+ files.each do |file_path|
432
+ if file_path.include?(TemporaryFile.recovery_file_name_tag)
433
+ skip_files << file_path
434
+ if file_path.include?(TemporaryFile.gzip_extension)
435
+ # also include the original corrupted gzip file
436
+ skip_files << file_path.gsub(TemporaryFile.recovery_file_name_tag, "")
437
+ end
402
438
  end
403
439
  end
440
+ skip_files
404
441
  end
405
442
  end
@@ -0,0 +1,4 @@
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
2
+
3
+ require 'jar_dependencies'
4
+ require_jar('org.logstash.plugins.integration.aws', 'logstash-integration-aws', '7.1.0')
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require "jars/installer"
3
+ require "fileutils"
4
+
5
+ task :vendor do
6
+ exit(1) unless system './gradlew vendor'
7
+ version = File.read("VERSION").strip
8
+ end
9
+
10
+ desc "clean"
11
+ task :clean do
12
+ ["build", "vendor/jar-dependencies", "Gemfile.lock"].each do |p|
13
+ FileUtils.rm_rf(p)
14
+ end
15
+ end
@@ -1,6 +1,8 @@
1
+ INTEGRATION_AWS_VERSION = File.read(File.expand_path(File.join(File.dirname(__FILE__), "VERSION"))).strip unless defined?(INTEGRATION_AWS_VERSION)
2
+
1
3
  Gem::Specification.new do |s|
2
4
  s.name = "logstash-integration-aws"
3
- s.version = "7.0.0"
5
+ s.version = INTEGRATION_AWS_VERSION
4
6
  s.licenses = ["Apache-2.0"]
5
7
  s.summary = "Collection of Logstash plugins that integrate with AWS"
6
8
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -23,7 +25,7 @@ Gem::Specification.new do |s|
23
25
  }
24
26
 
25
27
 
26
- s.require_paths = ["lib"]
28
+ s.require_paths = ["lib", "vendor/jar-dependencies"]
27
29
  s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "VERSION", "docs/**/*"]
28
30
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
29
31
 
@@ -7,18 +7,17 @@ require "stud/temporary"
7
7
  describe "Restore from crash", :integration => true do
8
8
  include_context "setup plugin"
9
9
 
10
- let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
11
-
12
10
  let(:number_of_files) { 5 }
13
11
  let(:dummy_content) { "foobar\n" * 100 }
14
- let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
15
12
 
16
13
  before do
17
14
  clean_remote_files(prefix)
18
15
  end
19
16
 
20
-
21
17
  context 'with a non-empty tempfile' do
18
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
19
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
20
+
22
21
  before do
23
22
  # Creating a factory always create a file
24
23
  factory.current.write(dummy_content)
@@ -30,6 +29,7 @@ describe "Restore from crash", :integration => true do
30
29
  factory.current.fsync
31
30
  end
32
31
  end
32
+
33
33
  it "uploads the file to the bucket" do
34
34
  subject.register
35
35
  try(20) do
@@ -41,6 +41,9 @@ describe "Restore from crash", :integration => true do
41
41
  end
42
42
 
43
43
  context 'with an empty tempfile' do
44
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write" }) }
45
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "none", temporary_directory)}
46
+
44
47
  before do
45
48
  factory.current
46
49
  factory.rotate!
@@ -63,5 +66,68 @@ describe "Restore from crash", :integration => true do
63
66
  expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
64
67
  end
65
68
  end
69
+
70
+ context "#gzip encoding" do
71
+ let(:options) { main_options.merge({ "restore" => true, "canned_acl" => "public-read-write", "encoding" => "gzip" }) }
72
+ let(:factory) { LogStash::Outputs::S3::TemporaryFileFactory.new(prefix, tags, "gzip", temporary_directory)}
73
+ describe "with empty recovered file" do
74
+ before do
75
+ # Creating a factory always create a file
76
+ factory.current.write('')
77
+ factory.current.fsync
78
+ factory.current.close
79
+ end
80
+
81
+ it 'should not upload and not remove temp file' do
82
+ subject.register
83
+ try(20) do
84
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
85
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(1)
86
+ end
87
+ end
88
+ end
89
+
90
+ describe "with healthy recovered, size is greater than zero file" do
91
+ before do
92
+ # Creating a factory always create a file
93
+ factory.current.write(dummy_content)
94
+ factory.current.fsync
95
+ factory.current.close
96
+
97
+ (number_of_files - 1).times do
98
+ factory.rotate!
99
+ factory.current.write(dummy_content)
100
+ factory.current.fsync
101
+ factory.current.close
102
+ end
103
+ end
104
+
105
+ it 'should recover, upload to S3 and remove temp file' do
106
+ subject.register
107
+ try(20) do
108
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(number_of_files)
109
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
110
+ expect(bucket_resource.objects(:prefix => prefix).first.acl.grants.collect(&:permission)).to include("READ", "WRITE")
111
+ end
112
+ end
113
+ end
114
+
115
+ describe "with failure when recovering" do
116
+ before do
117
+ # Creating a factory always create a file
118
+ factory.current.write(dummy_content)
119
+ factory.current.fsync
120
+ end
121
+
122
+ it 'should not upload to S3 and not remove temp file' do
123
+ subject.register
124
+ try(20) do
125
+ expect(bucket_resource.objects(:prefix => prefix).count).to eq(0)
126
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(1)
127
+ end
128
+ end
129
+ end
130
+ end
131
+
66
132
  end
67
133
 
@@ -25,11 +25,11 @@ describe LogStash::Outputs::S3::SizeRotationPolicy do
25
25
  end
26
26
 
27
27
  it "raises an exception if the `size_file` is 0" do
28
- expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /need to be greather than 0/)
28
+ expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /need to be greater than 0/)
29
29
  end
30
30
 
31
31
  it "raises an exception if the `size_file` is < 0" do
32
- expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /need to be greather than 0/)
32
+ expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /need to be greater than 0/)
33
33
  end
34
34
 
35
35
  context "#needs_periodic?" do
@@ -14,11 +14,11 @@ describe LogStash::Outputs::S3::TimeRotationPolicy do
14
14
  let(:file) { LogStash::Outputs::S3::TemporaryFile.new(name, temporary_file, temporary_directory) }
15
15
 
16
16
  it "raises an exception if the `file_time` is set to 0" do
17
- expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /`time_file` need to be greather than 0/)
17
+ expect { described_class.new(0) }.to raise_error(LogStash::ConfigurationError, /`time_file` need to be greater than 0/)
18
18
  end
19
19
 
20
20
  it "raises an exception if the `file_time` is < 0" do
21
- expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /`time_file` need to be greather than 0/)
21
+ expect { described_class.new(-100) }.to raise_error(LogStash::ConfigurationError, /`time_file` need to be greater than 0/)
22
22
  end
23
23
 
24
24
  context "when the size of the file is superior to 0" do
@@ -67,6 +67,7 @@ shared_context "setup plugin" do
67
67
  let(:bucket) { ENV["AWS_LOGSTASH_TEST_BUCKET"] }
68
68
  let(:access_key_id) { ENV["AWS_ACCESS_KEY_ID"] }
69
69
  let(:secret_access_key) { ENV["AWS_SECRET_ACCESS_KEY"] }
70
+ let(:session_token) { ENV["AWS_SESSION_TOKEN"] }
70
71
  let(:size_file) { 100 }
71
72
  let(:time_file) { 100 }
72
73
  let(:tags) { [] }
@@ -80,6 +81,7 @@ shared_context "setup plugin" do
80
81
  "temporary_directory" => temporary_directory,
81
82
  "access_key_id" => access_key_id,
82
83
  "secret_access_key" => secret_access_key,
84
+ "session_token" => session_token,
83
85
  "size_file" => size_file,
84
86
  "time_file" => time_file,
85
87
  "region" => region,
@@ -87,7 +89,7 @@ shared_context "setup plugin" do
87
89
  }
88
90
  end
89
91
 
90
- let(:client_credentials) { Aws::Credentials.new(access_key_id, secret_access_key) }
92
+ let(:client_credentials) { Aws::Credentials.new(access_key_id, secret_access_key, session_token) }
91
93
  let(:bucket_resource) { Aws::S3::Bucket.new(bucket, { :credentials => client_credentials, :region => region }) }
92
94
 
93
95
  subject { LogStash::Outputs::S3.new(options) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-integration-aws
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.0.0
4
+ version: 7.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-25 00:00:00.000000000 Z
11
+ date: 2023-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -311,6 +311,7 @@ files:
311
311
  - LICENSE
312
312
  - NOTICE.TXT
313
313
  - README.md
314
+ - VERSION
314
315
  - docs/codec-cloudfront.asciidoc
315
316
  - docs/codec-cloudtrail.asciidoc
316
317
  - docs/index.asciidoc
@@ -321,6 +322,7 @@ files:
321
322
  - docs/output-s3.asciidoc
322
323
  - docs/output-sns.asciidoc
323
324
  - docs/output-sqs.asciidoc
325
+ - lib/logstash-integration-aws_jars.rb
324
326
  - lib/logstash/codecs/cloudfront.rb
325
327
  - lib/logstash/codecs/cloudtrail.rb
326
328
  - lib/logstash/inputs/cloudwatch.rb
@@ -343,6 +345,7 @@ files:
343
345
  - lib/logstash/plugin_mixins/aws_config.rb
344
346
  - lib/logstash/plugin_mixins/aws_config/generic.rb
345
347
  - lib/logstash/plugin_mixins/aws_config/v2.rb
348
+ - lib/tasks/build.rake
346
349
  - logstash-integration-aws.gemspec
347
350
  - spec/codecs/cloudfront_spec.rb
348
351
  - spec/codecs/cloudtrail_spec.rb
@@ -402,6 +405,7 @@ post_install_message:
402
405
  rdoc_options: []
403
406
  require_paths:
404
407
  - lib
408
+ - vendor/jar-dependencies
405
409
  required_ruby_version: !ruby/object:Gem::Requirement
406
410
  requirements:
407
411
  - - ">="