logstash-output-s3 3.2.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/logstash/outputs/s3.rb +188 -308
- data/lib/logstash/outputs/s3/file_repository.rb +120 -0
- data/lib/logstash/outputs/s3/patch.rb +22 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +59 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
- data/logstash-output-s3.gemspec +2 -2
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/restore_from_crash_spec.rb +39 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/s3/file_repository_spec.rb +146 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
- data/spec/outputs/s3/temporary_file_spec.rb +40 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +57 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
- data/spec/outputs/s3_spec.rb +52 -335
- data/spec/spec_helper.rb +6 -0
- data/spec/supports/helpers.rb +33 -9
- metadata +65 -4
- data/spec/integration/s3_spec.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1640533d698e2ce25c2bdc426ab18d7a7447bdb5
|
4
|
+
data.tar.gz: 0af7083fdb8848e0057e8ef537a20e884add3208
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d8f14939c20adef00b8def8a4ef1cfed06a6f577ece6d741def4b7fc383febce068a2e49a4a91e41fc3ef18aa23ffcbd5fe7d9eb43d3028672bd3bf0656911d
|
7
|
+
data.tar.gz: c9b39d6715391c33fa5a4fcf0f0485bd4a197e1a5abb000460552b1011ceb9c31c1be3e9597081db607c84782bcf51d68cc82b970e58159fd83e1807716d1611
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 4.0.0
|
2
|
+
- This version is a complete rewrite over version 3.0.0 See #103
|
3
|
+
- This Plugin now uses the V2 version of the SDK, this make sure we receive the latest updates and changes.
|
4
|
+
- We now uses S3's `upload_file` instead of reading chunks, this method is more efficient and will uses the multipart with threads if the files is too big.
|
5
|
+
- You can now use the `fieldref` syntax in the prefix to dynamically changes the target with the events it receives.
|
6
|
+
- The Upload queue is now a bounded list, this options is necessary to allow back pressure to be communicated back to the pipeline but its configurable by the user.
|
7
|
+
- If the queue is full the plugin will start the upload in the current thread.
|
8
|
+
- The plugin now threadsafe and support the concurrency model `shared`
|
9
|
+
- The rotation strategy can be selected, the recommended is `size_and_time` that will check for both the configured limits (`size` and `time` are also available)
|
10
|
+
- The `restore` option will now use a separate threadpool with an unbounded queue
|
11
|
+
- The `restore` option will not block the launch of logstash and will uses less resources than the real time path
|
12
|
+
- The plugin now uses `multi_receive_encode`, this will optimize the writes to the files
|
13
|
+
- rotate operation are now batched to reduce the number of IO calls.
|
14
|
+
- Empty file will not be uploaded by any rotation rotation strategy
|
15
|
+
- We now use Concurrent-Ruby for the implementation of the java executor
|
16
|
+
- If you have finer grain permission on prefixes or want faster boot, you can disable the credentials check with `validate_credentials_on_root_bucket`
|
17
|
+
- The credentials check will no longer fails if we can't delete the file
|
18
|
+
- We now have a full suite of integration test for all the defined rotation
|
19
|
+
|
20
|
+
Fixes: #4 #81 #44 #59 #50
|
21
|
+
|
1
22
|
## 3.2.0
|
2
23
|
- Move to the new concurrency model `:single`
|
3
24
|
- use correct license identifier #99
|
data/lib/logstash/outputs/s3.rb
CHANGED
@@ -4,11 +4,17 @@ require "logstash/namespace"
|
|
4
4
|
require "logstash/plugin_mixins/aws_config"
|
5
5
|
require "stud/temporary"
|
6
6
|
require "stud/task"
|
7
|
-
require "
|
7
|
+
require "concurrent"
|
8
|
+
require "socket"
|
8
9
|
require "thread"
|
9
10
|
require "tmpdir"
|
10
11
|
require "fileutils"
|
12
|
+
require "set"
|
13
|
+
require "pathname"
|
14
|
+
require "aws-sdk"
|
15
|
+
require "logstash/outputs/s3/patch"
|
11
16
|
|
17
|
+
Aws.eager_autoload!
|
12
18
|
|
13
19
|
# INFORMATION:
|
14
20
|
#
|
@@ -17,35 +23,34 @@ require "fileutils"
|
|
17
23
|
# Requirements:
|
18
24
|
# * Amazon S3 Bucket and S3 Access Permissions (Typically access_key_id and secret_access_key)
|
19
25
|
# * S3 PutObject permission
|
20
|
-
# * Run logstash as superuser to establish connection
|
21
26
|
#
|
22
|
-
# S3 outputs create temporary files into
|
27
|
+
# S3 outputs create temporary files into the OS' temporary directory, you can specify where to save them using the `temporary_directory` option.
|
23
28
|
#
|
24
29
|
# S3 output files have the following format
|
25
30
|
#
|
26
31
|
# ls.s3.ip-10-228-27-95.2013-04-18T10.00.tag_hello.part0.txt
|
27
32
|
#
|
28
|
-
# ls.s3 : indicate logstash plugin s3
|
29
33
|
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
34
|
+
# |=======
|
35
|
+
# | ls.s3 | indicate logstash plugin s3 |
|
36
|
+
# | ip-10-228-27-95 | indicates the ip of your machine. |
|
37
|
+
# | 2013-04-18T10.00 | represents the time whenever you specify time_file. |
|
38
|
+
# | tag_hello | this indicates the event's tag. |
|
39
|
+
# | part0 | this means if you indicate size_file then it will generate more parts if you file.size > size_file. When a file is full it will be pushed to the bucket and then deleted from the temporary directory. If a file is empty, it is simply deleted. Empty files will not be pushed |
|
40
|
+
# |=======
|
36
41
|
#
|
37
42
|
# Crash Recovery:
|
38
|
-
# * This plugin will recover and upload temporary log files after crash/abnormal termination
|
43
|
+
# * This plugin will recover and upload temporary log files after crash/abnormal termination when using `restore` set to true
|
39
44
|
#
|
40
45
|
##[Note regarding time_file and size_file] :
|
41
46
|
#
|
42
|
-
|
43
|
-
|
47
|
+
## Both time_file and size_file settings can trigger a log "file rotation"
|
48
|
+
## A log rotation pushes the current log "part" to s3 and deleted from local temporary storage.
|
44
49
|
#
|
45
50
|
## If you specify BOTH size_file and time_file then it will create file for each tag (if specified).
|
46
51
|
## When EITHER time_file minutes have elapsed OR log file size > size_file, a log rotation is triggered.
|
47
52
|
##
|
48
|
-
## If you ONLY specify time_file but NOT file_size, one file for each tag (if specified) will be created
|
53
|
+
## If you ONLY specify time_file but NOT file_size, one file for each tag (if specified) will be created.
|
49
54
|
## When time_file minutes elapses, a log rotation will be triggered.
|
50
55
|
#
|
51
56
|
## If you ONLY specify size_file, but NOT time_file, one files for each tag (if specified) will be created.
|
@@ -63,46 +68,63 @@ require "fileutils"
|
|
63
68
|
# access_key_id => "crazy_key" (required)
|
64
69
|
# secret_access_key => "monkey_access_key" (required)
|
65
70
|
# region => "eu-west-1" (optional, default = "us-east-1")
|
66
|
-
# bucket => "
|
71
|
+
# bucket => "your_bucket" (required)
|
67
72
|
# size_file => 2048 (optional) - Bytes
|
68
73
|
# time_file => 5 (optional) - Minutes
|
69
74
|
# format => "plain" (optional)
|
70
|
-
# canned_acl => "private" (optional. Options are "private", "public_read", "public_read_write", "authenticated_read"
|
75
|
+
# canned_acl => "private" (optional. Options are "private", "public_read", "public_read_write", "authenticated_read". Defaults to "private" )
|
71
76
|
# }
|
72
77
|
#
|
73
78
|
class LogStash::Outputs::S3 < LogStash::Outputs::Base
|
74
|
-
|
79
|
+
require "logstash/outputs/s3/writable_directory_validator"
|
80
|
+
require "logstash/outputs/s3/path_validator"
|
81
|
+
require "logstash/outputs/s3/write_bucket_permission_validator"
|
82
|
+
require "logstash/outputs/s3/size_rotation_policy"
|
83
|
+
require "logstash/outputs/s3/time_rotation_policy"
|
84
|
+
require "logstash/outputs/s3/size_and_time_rotation_policy"
|
85
|
+
require "logstash/outputs/s3/temporary_file"
|
86
|
+
require "logstash/outputs/s3/temporary_file_factory"
|
87
|
+
require "logstash/outputs/s3/uploader"
|
88
|
+
require "logstash/outputs/s3/file_repository"
|
89
|
+
|
90
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
91
|
+
|
92
|
+
PREFIX_KEY_NORMALIZE_CHARACTER = "_"
|
93
|
+
PERIODIC_CHECK_INTERVAL_IN_SECONDS = 15
|
94
|
+
CRASH_RECOVERY_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
95
|
+
:min_threads => 1,
|
96
|
+
:max_threads => 2,
|
97
|
+
:fallback_policy => :caller_runs
|
98
|
+
})
|
75
99
|
|
76
|
-
TEMPFILE_EXTENSION = "txt"
|
77
|
-
S3_INVALID_CHARACTERS = /[\^`><]/
|
78
100
|
|
79
101
|
config_name "s3"
|
80
|
-
default :codec,
|
102
|
+
default :codec, "line"
|
81
103
|
|
82
|
-
concurrency :
|
104
|
+
concurrency :shared
|
83
105
|
|
84
106
|
# S3 bucket
|
85
|
-
config :bucket, :validate => :string
|
107
|
+
config :bucket, :validate => :string, :required => true
|
86
108
|
|
87
109
|
# Set the size of file in bytes, this means that files on bucket when have dimension > file_size, they are stored in two or more file.
|
88
110
|
# If you have tags then it will generate a specific size file for every tags
|
89
111
|
##NOTE: define size of file is the better thing, because generate a local temporary file on disk and then put it in bucket.
|
90
|
-
config :size_file, :validate => :number, :default =>
|
112
|
+
config :size_file, :validate => :number, :default => 1024 * 1024 * 5
|
91
113
|
|
92
114
|
# Set the time, in MINUTES, to close the current sub_time_section of bucket.
|
93
115
|
# If you define file_size you have a number of files in consideration of the section and the current tag.
|
94
116
|
# 0 stay all time on listerner, beware if you specific 0 and size_file 0, because you will not put the file on bucket,
|
95
117
|
# for now the only thing this plugin can do is to put the file when logstash restart.
|
96
|
-
config :time_file, :validate => :number, :default =>
|
118
|
+
config :time_file, :validate => :number, :default => 15
|
97
119
|
|
98
120
|
## IMPORTANT: if you use multiple instance of s3, you should specify on one of them the "restore=> true" and on the others "restore => false".
|
99
121
|
## This is hack for not destroy the new files after restoring the initial files.
|
100
122
|
## If you do not specify "restore => true" when logstash crashes or is restarted, the files are not sent into the bucket,
|
101
123
|
## for example if you have single Instance.
|
102
|
-
config :restore, :validate => :boolean, :default =>
|
124
|
+
config :restore, :validate => :boolean, :default => true
|
103
125
|
|
104
126
|
# The S3 canned ACL to use when putting the file. Defaults to "private".
|
105
|
-
config :canned_acl, :validate => ["private", "public_read", "public_read_write", "authenticated_read"
|
127
|
+
config :canned_acl, :validate => ["private", "public_read", "public_read_write", "authenticated_read"],
|
106
128
|
:default => "private"
|
107
129
|
|
108
130
|
# Specifies wether or not to use S3's AES256 server side encryption. Defaults to false.
|
@@ -113,10 +135,14 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
|
|
113
135
|
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
114
136
|
|
115
137
|
# Specify a prefix to the uploaded filename, this can simulate directories on S3. Prefix does not require leading slash.
|
138
|
+
# This option support string interpolation, be warned this can created a lot of temporary local files.
|
116
139
|
config :prefix, :validate => :string, :default => ''
|
117
140
|
|
118
141
|
# Specify how many workers to use to upload the files to S3
|
119
|
-
config :upload_workers_count, :validate => :number, :default =>
|
142
|
+
config :upload_workers_count, :validate => :number, :default => (Concurrent.processor_count * 0.5).ceil
|
143
|
+
|
144
|
+
# Number of items we can keep in the local queue before uploading them
|
145
|
+
config :upload_queue_size, :validate => :number, :default => 2 * (Concurrent.processor_count * 0.25).ceil
|
120
146
|
|
121
147
|
# The version of the S3 signature hash to use. Normally uses the internal client default, can be explicitly
|
122
148
|
# specified here
|
@@ -135,348 +161,202 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
|
|
135
161
|
# Specify the content encoding. Supports ("gzip"). Defaults to "none"
|
136
162
|
config :encoding, :validate => ["none", "gzip"], :default => "none"
|
137
163
|
|
138
|
-
#
|
139
|
-
|
140
|
-
|
141
|
-
attr_reader :s3
|
142
|
-
|
143
|
-
def aws_s3_config
|
144
|
-
@logger.info("Registering s3 output", :bucket => @bucket, :endpoint_region => @region)
|
145
|
-
@s3 = AWS::S3.new(full_options)
|
146
|
-
end
|
147
|
-
|
148
|
-
def full_options
|
149
|
-
aws_options_hash.merge(signature_options)
|
150
|
-
end
|
151
|
-
|
152
|
-
def signature_options
|
153
|
-
if @signature_version
|
154
|
-
{:s3_signature_version => @signature_version}
|
155
|
-
else
|
156
|
-
{}
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
def aws_service_endpoint(region)
|
161
|
-
return {
|
162
|
-
:s3_endpoint => region == 'us-east-1' ? 's3.amazonaws.com' : "s3-#{region}.amazonaws.com"
|
163
|
-
}
|
164
|
-
end
|
165
|
-
|
166
|
-
public
|
167
|
-
def write_on_bucket(file)
|
168
|
-
# find and use the bucket
|
169
|
-
bucket = @s3.buckets[@bucket]
|
170
|
-
|
171
|
-
remote_filename = "#{@prefix}#{File.basename(file)}"
|
164
|
+
# Define the strategy to use to decide when we need to rotate the file and push it to S3,
|
165
|
+
# The default strategy is to check for both size and time, the first one to match will rotate the file.
|
166
|
+
config :rotation_strategy, :validate => ["size_and_time", "size", "time"], :default => "size_and_time"
|
172
167
|
|
173
|
-
|
168
|
+
# The common use case is to define permission on the root bucket and give Logstash full access to write its logs.
|
169
|
+
# In some circonstances you need finer grained permission on subfolder, this allow you to disable the check at startup.
|
170
|
+
config :validate_credentials_on_root_bucket, :validate => :boolean, :default => true
|
174
171
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
:content_encoding => @encoding == "gzip" ? "gzip" : nil)
|
183
|
-
rescue AWS::Errors::Base => error
|
184
|
-
@logger.error("S3: AWS error", :error => error)
|
185
|
-
raise LogStash::Error, "AWS Configuration Error, #{error}"
|
172
|
+
def register
|
173
|
+
# I've move the validation of the items into custom classes
|
174
|
+
# to prepare for the new config validation that will be part of the core so the core can
|
175
|
+
# be moved easily.
|
176
|
+
unless @prefix.empty?
|
177
|
+
if !PathValidator.valid?(prefix)
|
178
|
+
raise LogStash::ConfigurationError, "Prefix must not contains: #{PathValidator::INVALID_CHARACTERS}"
|
186
179
|
end
|
187
180
|
end
|
188
181
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
# This method is used for create new empty temporary files for use. Flag is needed for indicate new subsection time_file.
|
193
|
-
public
|
194
|
-
def create_temporary_file
|
195
|
-
filename = File.join(@temporary_directory, get_temporary_filename(@page_counter))
|
196
|
-
|
197
|
-
@logger.debug("S3: Creating a new temporary file", :filename => filename)
|
198
|
-
|
199
|
-
@file_rotation_lock.synchronize do
|
200
|
-
unless @tempfile.nil?
|
201
|
-
@tempfile.close
|
202
|
-
end
|
203
|
-
|
204
|
-
if @encoding == "gzip"
|
205
|
-
@tempfile = Zlib::GzipWriter.open(filename)
|
206
|
-
else
|
207
|
-
@tempfile = File.open(filename, "a")
|
208
|
-
end
|
182
|
+
if !WritableDirectoryValidator.valid?(@temporary_directory)
|
183
|
+
raise LogStash::ConfigurationError, "Logstash must have the permissions to write to the temporary directory: #{@temporary_directory}"
|
209
184
|
end
|
210
|
-
end
|
211
185
|
|
212
|
-
|
213
|
-
|
214
|
-
require "aws-sdk"
|
215
|
-
# required if using ruby version < 2.0
|
216
|
-
# http://ruby.awsblog.com/post/Tx16QY1CI5GVBFT/Threading-with-the-AWS-SDK-for-Ruby
|
217
|
-
AWS.eager_autoload!(AWS::S3)
|
218
|
-
|
219
|
-
@s3 = aws_s3_config
|
220
|
-
@upload_queue = Queue.new
|
221
|
-
@file_rotation_lock = Mutex.new
|
222
|
-
|
223
|
-
if @prefix && @prefix =~ S3_INVALID_CHARACTERS
|
224
|
-
@logger.error("S3: prefix contains invalid characters", :prefix => @prefix, :contains => S3_INVALID_CHARACTERS)
|
225
|
-
raise LogStash::ConfigurationError, "S3: prefix contains invalid characters"
|
186
|
+
if @validate_credentials_on_root_bucket && !WriteBucketPermissionValidator.valid?(bucket_resource)
|
187
|
+
raise LogStash::ConfigurationError, "Logstash must have the privileges to write to root bucket `#{@bucket}`, check you credentials or your permissions."
|
226
188
|
end
|
227
189
|
|
228
|
-
if
|
229
|
-
|
190
|
+
if @time_file.nil? && @size_file.nil? || @size_file == 0 && @time_file == 0
|
191
|
+
raise LogStash::ConfigurationError, "The S3 plugin must have at least one of time_file or size_file set to a value greater than 0"
|
230
192
|
end
|
231
193
|
|
232
|
-
|
194
|
+
@file_repository = FileRepository.new(@tags, @encoding, @temporary_directory)
|
233
195
|
|
234
|
-
|
235
|
-
reset_page_counter
|
236
|
-
create_temporary_file
|
237
|
-
configure_periodic_rotation if time_file != 0
|
238
|
-
configure_upload_workers
|
196
|
+
@rotation = rotation_strategy
|
239
197
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
198
|
+
executor = Concurrent::ThreadPoolExecutor.new({ :min_threads => 1,
|
199
|
+
:max_threads => @upload_workers_count,
|
200
|
+
:max_queue => @upload_queue_size,
|
201
|
+
:fallback_policy => :caller_runs })
|
244
202
|
|
203
|
+
@uploader = Uploader.new(bucket_resource, @logger, executor)
|
245
204
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
def test_s3_write
|
250
|
-
@logger.debug("S3: Creating a test file on S3")
|
205
|
+
# Restoring from crash will use a new threadpool to slowly recover
|
206
|
+
# New events should have more priority.
|
207
|
+
restore_from_crash if @restore
|
251
208
|
|
252
|
-
|
253
|
-
|
209
|
+
# If we need time based rotation we need to do periodic check on the file
|
210
|
+
# to take care of file that were not updated recently
|
211
|
+
start_periodic_check if @rotation.needs_periodic?
|
212
|
+
end
|
254
213
|
|
255
|
-
|
256
|
-
|
257
|
-
end
|
214
|
+
def multi_receive_encoded(events_and_encoded)
|
215
|
+
prefix_written_to = Set.new
|
258
216
|
|
259
|
-
|
260
|
-
|
217
|
+
events_and_encoded.each do |event, encoded|
|
218
|
+
prefix_key = normalize_key(event.sprintf(@prefix))
|
219
|
+
prefix_written_to << prefix_key
|
261
220
|
|
262
221
|
begin
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
rescue
|
267
|
-
|
268
|
-
|
222
|
+
@file_repository.get_file(prefix_key) { |file| file.write(encoded) }
|
223
|
+
# The output should stop accepting new events coming in, since it cannot do anything with them anymore.
|
224
|
+
# Log the error and rethrow it.
|
225
|
+
rescue Errno::ENOSPC => e
|
226
|
+
@logger.error("S3: No space left in temporary directory", :temporary_directory => @temporary_directory)
|
227
|
+
raise e
|
269
228
|
end
|
270
|
-
ensure
|
271
|
-
File.delete(test_filename)
|
272
229
|
end
|
230
|
+
|
231
|
+
# Groups IO calls to optimize fstat checks
|
232
|
+
rotate_if_needed(prefix_written_to)
|
273
233
|
end
|
274
234
|
|
275
|
-
|
276
|
-
|
277
|
-
@logger.debug("S3: Checking for temp files from a previoius crash...")
|
235
|
+
def close
|
236
|
+
stop_periodic_check if @rotation.needs_periodic?
|
278
237
|
|
279
|
-
|
280
|
-
name_file = File.basename(file)
|
281
|
-
@logger.warn("S3: Found temporary file from crash. Uploading file to S3.", :filename => name_file)
|
282
|
-
move_file_to_bucket_async(file)
|
283
|
-
end
|
284
|
-
end
|
238
|
+
@logger.debug("Uploading current workspace")
|
285
239
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
240
|
+
# The plugin has stopped receiving new events, but we still have
|
241
|
+
# data on disk, lets make sure it get to S3.
|
242
|
+
# If Logstash get interrupted, the `restore_from_crash` (when set to true) method will pickup
|
243
|
+
# the content in the temporary directly and upload it.
|
244
|
+
# This will block the shutdown until all upload are done or the use force quit.
|
245
|
+
@file_repository.each_files do |file|
|
246
|
+
upload_file(file)
|
291
247
|
end
|
292
248
|
|
293
|
-
|
294
|
-
File.delete(file)
|
295
|
-
rescue Errno::ENOENT
|
296
|
-
# Something else deleted the file, logging but not raising the issue
|
297
|
-
@logger.warn("S3: Cannot delete the temporary file since it doesn't exist on disk", :filename => File.basename(file))
|
298
|
-
rescue Errno::EACCES
|
299
|
-
@logger.error("S3: Logstash doesnt have the permission to delete the file in the temporary directory.", :filename => File.basename(file), :temporary_directory => @temporary_directory)
|
300
|
-
end
|
301
|
-
end
|
249
|
+
@file_repository.shutdown
|
302
250
|
|
303
|
-
|
304
|
-
|
305
|
-
@time_file * 60
|
251
|
+
@uploader.stop # wait until all the current upload are complete
|
252
|
+
@crash_uploader.stop if @restore # we might have still work to do for recovery so wait until we are done
|
306
253
|
end
|
307
254
|
|
308
|
-
|
309
|
-
|
310
|
-
|
255
|
+
def full_options
|
256
|
+
options = { :credentials => credentials }
|
257
|
+
options[:s3_signature_version] = @signature_version if @signature_version
|
258
|
+
options.merge(aws_options_hash)
|
311
259
|
end
|
312
260
|
|
313
|
-
|
314
|
-
|
315
|
-
current_time = Time.now
|
316
|
-
filename = "ls.s3.#{Socket.gethostname}.#{current_time.strftime("%Y-%m-%dT%H.%M")}"
|
317
|
-
|
318
|
-
if @tags.size > 0
|
319
|
-
return "#{filename}.tag_#{@tags.join('.')}.part#{page_counter}.#{get_tempfile_extension}"
|
320
|
-
else
|
321
|
-
return "#{filename}.part#{page_counter}.#{get_tempfile_extension}"
|
322
|
-
end
|
261
|
+
def normalize_key(prefix_key)
|
262
|
+
prefix_key.gsub(PathValidator.matches_re, PREFIX_KEY_NORMALIZE_CHARACTER)
|
323
263
|
end
|
324
264
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
@
|
329
|
-
end
|
265
|
+
private
|
266
|
+
# We start a task in the background for check for stale files and make sure we rotate them to S3 if needed.
|
267
|
+
def start_periodic_check
|
268
|
+
@logger.debug("Start periodic rotation check")
|
330
269
|
|
331
|
-
|
332
|
-
|
333
|
-
@file_rotation_lock.synchronize do
|
334
|
-
tempfile_size > @size_file
|
335
|
-
end
|
336
|
-
end
|
270
|
+
@periodic_check = Concurrent::TimerTask.new(:execution_interval => PERIODIC_CHECK_INTERVAL_IN_SECONDS) do
|
271
|
+
@logger.debug("Periodic check for stale files")
|
337
272
|
|
338
|
-
|
339
|
-
def tempfile_size
|
340
|
-
if @tempfile.instance_of? File
|
341
|
-
@tempfile.size
|
342
|
-
elsif @tempfile.instance_of? Zlib::GzipWriter
|
343
|
-
@tempfile.tell
|
344
|
-
else
|
345
|
-
raise LogStash::Error, "Unable to get size of temp file of type #{@tempfile.class}"
|
273
|
+
rotate_if_needed(@file_repository.keys)
|
346
274
|
end
|
347
|
-
end
|
348
275
|
|
349
|
-
|
350
|
-
def write_events_to_multiple_files?
|
351
|
-
@size_file > 0
|
276
|
+
@periodic_check.execute
|
352
277
|
end
|
353
278
|
|
354
|
-
|
355
|
-
|
356
|
-
begin
|
357
|
-
@logger.debug("S3: put event into tempfile ", :tempfile => File.basename(@tempfile.path))
|
358
|
-
|
359
|
-
@file_rotation_lock.synchronize do
|
360
|
-
@tempfile.write(event)
|
361
|
-
end
|
362
|
-
rescue Errno::ENOSPC
|
363
|
-
@logger.error("S3: No space left in temporary directory", :temporary_directory => @temporary_directory)
|
364
|
-
close
|
365
|
-
end
|
279
|
+
def stop_periodic_check
|
280
|
+
@periodic_check.shutdown
|
366
281
|
end
|
367
282
|
|
368
|
-
|
369
|
-
|
370
|
-
shutdown_upload_workers
|
371
|
-
@periodic_rotation_thread.stop! if @periodic_rotation_thread
|
372
|
-
|
373
|
-
@file_rotation_lock.synchronize do
|
374
|
-
@tempfile.close unless @tempfile.nil? && @tempfile.closed?
|
375
|
-
end
|
283
|
+
def bucket_resource
|
284
|
+
Aws::S3::Bucket.new(@bucket, { :credentials => credentials }.merge(aws_options_hash))
|
376
285
|
end
|
377
286
|
|
378
|
-
|
379
|
-
|
380
|
-
@logger.debug("S3: Gracefully shutdown the upload workers")
|
381
|
-
@upload_queue << LogStash::SHUTDOWN
|
287
|
+
def aws_service_endpoint(region)
|
288
|
+
{ :s3_endpoint => region == 'us-east-1' ? 's3.amazonaws.com' : "s3-#{region}.amazonaws.com"}
|
382
289
|
end
|
383
290
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
tempfile_path = @tempfile.path
|
391
|
-
# close and start next file before sending the previous one
|
392
|
-
next_page
|
393
|
-
create_temporary_file
|
394
|
-
|
395
|
-
# send to s3
|
396
|
-
move_file_to_bucket_async(tempfile_path)
|
397
|
-
else
|
398
|
-
@logger.debug("S3: tempfile file size report.", :tempfile_size => tempfile_size, :size_file => @size_file)
|
399
|
-
end
|
400
|
-
end
|
401
|
-
|
402
|
-
write_to_tempfile(encoded_event)
|
291
|
+
def upload_options
|
292
|
+
{
|
293
|
+
:acl => @cannel_acl,
|
294
|
+
:server_side_encryption => @server_side_encryption ? :aes256 : nil,
|
295
|
+
:content_encoding => @encoding == "gzip" ? "gzip" : nil
|
296
|
+
}
|
403
297
|
end
|
404
298
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
299
|
+
def rotate_if_needed(prefixes)
|
300
|
+
prefixes.each do |prefix|
|
301
|
+
# Each file access is thread safe,
|
302
|
+
# until the rotation is done then only
|
303
|
+
# one thread has access to the resource.
|
304
|
+
@file_repository.get_factory(prefix) do |factory|
|
305
|
+
temp_file = factory.current
|
306
|
+
|
307
|
+
if @rotation.rotate?(temp_file)
|
308
|
+
@logger.debug("Rotate file",
|
309
|
+
:strategy => @rotation.class.name,
|
310
|
+
:key => temp_file.key,
|
311
|
+
:path => temp_file.path)
|
312
|
+
|
313
|
+
upload_file(temp_file)
|
314
|
+
factory.rotate!
|
315
|
+
end
|
420
316
|
end
|
421
317
|
end
|
422
318
|
end
|
423
319
|
|
424
|
-
|
425
|
-
|
426
|
-
@logger.debug("S3: Configure upload workers")
|
427
|
-
|
428
|
-
@upload_workers = @upload_workers_count.times.map do |worker_id|
|
429
|
-
Stud::Task.new do
|
430
|
-
LogStash::Util::set_thread_name("<S3 upload worker #{worker_id}")
|
431
|
-
|
432
|
-
continue = true
|
433
|
-
while continue do
|
434
|
-
@logger.debug("S3: upload worker is waiting for a new file to upload.", :worker_id => worker_id)
|
320
|
+
def upload_file(temp_file)
|
321
|
+
@logger.debug("Queue for upload", :path => temp_file.path)
|
435
322
|
|
436
|
-
|
437
|
-
|
438
|
-
|
323
|
+
# if the queue is full the calling thread will be used to upload
|
324
|
+
temp_file.close # make sure the content is on disk
|
325
|
+
if temp_file.size > 0
|
326
|
+
@uploader.upload_async(temp_file,
|
327
|
+
:on_complete => method(:clean_temporary_file),
|
328
|
+
:upload_options => upload_options )
|
439
329
|
end
|
440
330
|
end
|
441
331
|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
@upload_queue.enq(LogStash::SHUTDOWN)
|
451
|
-
false
|
452
|
-
else
|
453
|
-
@logger.debug("S3: upload working is uploading a new file", :filename => File.basename(file))
|
454
|
-
move_file_to_bucket(file)
|
455
|
-
true
|
456
|
-
end
|
457
|
-
rescue Exception => ex
|
458
|
-
@logger.error("failed to upload, will re-enqueue #{file} for upload",
|
459
|
-
:ex => ex, :backtrace => ex.backtrace)
|
460
|
-
unless file.nil? # Rare case if the first line of the begin doesn't execute
|
461
|
-
@upload_queue.enq(file)
|
462
|
-
end
|
463
|
-
true
|
332
|
+
def rotation_strategy
|
333
|
+
case @rotation_strategy
|
334
|
+
when "size"
|
335
|
+
SizeRotationPolicy.new(size_file)
|
336
|
+
when "time"
|
337
|
+
TimeRotationPolicy.new(time_file)
|
338
|
+
when "size_and_time"
|
339
|
+
SizeAndTimeRotationPolicy.new(size_file, time_file)
|
464
340
|
end
|
465
341
|
end
|
466
342
|
|
467
|
-
|
468
|
-
|
469
|
-
|
343
|
+
def clean_temporary_file(file)
|
344
|
+
@logger.debug("Removing temporary file", :file => file.path)
|
345
|
+
file.delete!
|
470
346
|
end
|
471
347
|
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
348
|
+
# The upload process will use a separate uploader/threadpool with less resource allocated to it.
|
349
|
+
# but it will use an unbounded queue for the work, it may take some time before all the older files get processed.
|
350
|
+
def restore_from_crash
|
351
|
+
@crash_uploader = Uploader.new(bucket_resource, @logger, CRASH_RECOVERY_THREADPOOL)
|
352
|
+
|
353
|
+
temp_folder_path = Pathname.new(@temporary_directory)
|
354
|
+
Dir.glob(::File.join(@temporary_directory, "**/*"))
|
355
|
+
.select { |file| ::File.file?(file) }
|
356
|
+
.each do |file|
|
357
|
+
temp_file = TemporaryFile.create_from_existing_file(file, temp_folder_path)
|
358
|
+
@logger.debug("Recovering from crash and uploading", :file => temp_file.path)
|
359
|
+
@crash_uploader.upload_async(temp_file, :on_complete => method(:clean_temporary_file))
|
360
|
+
end
|
481
361
|
end
|
482
362
|
end
|