logstash-output-s3 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/logstash/outputs/s3.rb +188 -308
- data/lib/logstash/outputs/s3/file_repository.rb +120 -0
- data/lib/logstash/outputs/s3/patch.rb +22 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +59 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
- data/logstash-output-s3.gemspec +2 -2
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/restore_from_crash_spec.rb +39 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/s3/file_repository_spec.rb +146 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
- data/spec/outputs/s3/temporary_file_spec.rb +40 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +57 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
- data/spec/outputs/s3_spec.rb +52 -335
- data/spec/spec_helper.rb +6 -0
- data/spec/supports/helpers.rb +33 -9
- metadata +65 -4
- data/spec/integration/s3_spec.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1640533d698e2ce25c2bdc426ab18d7a7447bdb5
|
4
|
+
data.tar.gz: 0af7083fdb8848e0057e8ef537a20e884add3208
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d8f14939c20adef00b8def8a4ef1cfed06a6f577ece6d741def4b7fc383febce068a2e49a4a91e41fc3ef18aa23ffcbd5fe7d9eb43d3028672bd3bf0656911d
|
7
|
+
data.tar.gz: c9b39d6715391c33fa5a4fcf0f0485bd4a197e1a5abb000460552b1011ceb9c31c1be3e9597081db607c84782bcf51d68cc82b970e58159fd83e1807716d1611
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 4.0.0
|
2
|
+
- This version is a complete rewrite over version 3.0.0 See #103
|
3
|
+
- This Plugin now uses the V2 version of the SDK, this make sure we receive the latest updates and changes.
|
4
|
+
- We now uses S3's `upload_file` instead of reading chunks, this method is more efficient and will uses the multipart with threads if the files is too big.
|
5
|
+
- You can now use the `fieldref` syntax in the prefix to dynamically changes the target with the events it receives.
|
6
|
+
- The Upload queue is now a bounded list, this options is necessary to allow back pressure to be communicated back to the pipeline but its configurable by the user.
|
7
|
+
- If the queue is full the plugin will start the upload in the current thread.
|
8
|
+
- The plugin now threadsafe and support the concurrency model `shared`
|
9
|
+
- The rotation strategy can be selected, the recommended is `size_and_time` that will check for both the configured limits (`size` and `time` are also available)
|
10
|
+
- The `restore` option will now use a separate threadpool with an unbounded queue
|
11
|
+
- The `restore` option will not block the launch of logstash and will uses less resources than the real time path
|
12
|
+
- The plugin now uses `multi_receive_encode`, this will optimize the writes to the files
|
13
|
+
- rotate operation are now batched to reduce the number of IO calls.
|
14
|
+
- Empty file will not be uploaded by any rotation rotation strategy
|
15
|
+
- We now use Concurrent-Ruby for the implementation of the java executor
|
16
|
+
- If you have finer grain permission on prefixes or want faster boot, you can disable the credentials check with `validate_credentials_on_root_bucket`
|
17
|
+
- The credentials check will no longer fails if we can't delete the file
|
18
|
+
- We now have a full suite of integration test for all the defined rotation
|
19
|
+
|
20
|
+
Fixes: #4 #81 #44 #59 #50
|
21
|
+
|
1
22
|
## 3.2.0
|
2
23
|
- Move to the new concurrency model `:single`
|
3
24
|
- use correct license identifier #99
|
data/lib/logstash/outputs/s3.rb
CHANGED
@@ -4,11 +4,17 @@ require "logstash/namespace"
|
|
4
4
|
require "logstash/plugin_mixins/aws_config"
|
5
5
|
require "stud/temporary"
|
6
6
|
require "stud/task"
|
7
|
-
require "
|
7
|
+
require "concurrent"
|
8
|
+
require "socket"
|
8
9
|
require "thread"
|
9
10
|
require "tmpdir"
|
10
11
|
require "fileutils"
|
12
|
+
require "set"
|
13
|
+
require "pathname"
|
14
|
+
require "aws-sdk"
|
15
|
+
require "logstash/outputs/s3/patch"
|
11
16
|
|
17
|
+
Aws.eager_autoload!
|
12
18
|
|
13
19
|
# INFORMATION:
|
14
20
|
#
|
@@ -17,35 +23,34 @@ require "fileutils"
|
|
17
23
|
# Requirements:
|
18
24
|
# * Amazon S3 Bucket and S3 Access Permissions (Typically access_key_id and secret_access_key)
|
19
25
|
# * S3 PutObject permission
|
20
|
-
# * Run logstash as superuser to establish connection
|
21
26
|
#
|
22
|
-
# S3 outputs create temporary files into
|
27
|
+
# S3 outputs create temporary files into the OS' temporary directory, you can specify where to save them using the `temporary_directory` option.
|
23
28
|
#
|
24
29
|
# S3 output files have the following format
|
25
30
|
#
|
26
31
|
# ls.s3.ip-10-228-27-95.2013-04-18T10.00.tag_hello.part0.txt
|
27
32
|
#
|
28
|
-
# ls.s3 : indicate logstash plugin s3
|
29
33
|
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
34
|
+
# |=======
|
35
|
+
# | ls.s3 | indicate logstash plugin s3 |
|
36
|
+
# | ip-10-228-27-95 | indicates the ip of your machine. |
|
37
|
+
# | 2013-04-18T10.00 | represents the time whenever you specify time_file. |
|
38
|
+
# | tag_hello | this indicates the event's tag. |
|
39
|
+
# | part0 | this means if you indicate size_file then it will generate more parts if you file.size > size_file. When a file is full it will be pushed to the bucket and then deleted from the temporary directory. If a file is empty, it is simply deleted. Empty files will not be pushed |
|
40
|
+
# |=======
|
36
41
|
#
|
37
42
|
# Crash Recovery:
|
38
|
-
# * This plugin will recover and upload temporary log files after crash/abnormal termination
|
43
|
+
# * This plugin will recover and upload temporary log files after crash/abnormal termination when using `restore` set to true
|
39
44
|
#
|
40
45
|
##[Note regarding time_file and size_file] :
|
41
46
|
#
|
42
|
-
|
43
|
-
|
47
|
+
## Both time_file and size_file settings can trigger a log "file rotation"
|
48
|
+
## A log rotation pushes the current log "part" to s3 and deleted from local temporary storage.
|
44
49
|
#
|
45
50
|
## If you specify BOTH size_file and time_file then it will create file for each tag (if specified).
|
46
51
|
## When EITHER time_file minutes have elapsed OR log file size > size_file, a log rotation is triggered.
|
47
52
|
##
|
48
|
-
## If you ONLY specify time_file but NOT file_size, one file for each tag (if specified) will be created
|
53
|
+
## If you ONLY specify time_file but NOT file_size, one file for each tag (if specified) will be created.
|
49
54
|
## When time_file minutes elapses, a log rotation will be triggered.
|
50
55
|
#
|
51
56
|
## If you ONLY specify size_file, but NOT time_file, one files for each tag (if specified) will be created.
|
@@ -63,46 +68,63 @@ require "fileutils"
|
|
63
68
|
# access_key_id => "crazy_key" (required)
|
64
69
|
# secret_access_key => "monkey_access_key" (required)
|
65
70
|
# region => "eu-west-1" (optional, default = "us-east-1")
|
66
|
-
# bucket => "
|
71
|
+
# bucket => "your_bucket" (required)
|
67
72
|
# size_file => 2048 (optional) - Bytes
|
68
73
|
# time_file => 5 (optional) - Minutes
|
69
74
|
# format => "plain" (optional)
|
70
|
-
# canned_acl => "private" (optional. Options are "private", "public_read", "public_read_write", "authenticated_read"
|
75
|
+
# canned_acl => "private" (optional. Options are "private", "public_read", "public_read_write", "authenticated_read". Defaults to "private" )
|
71
76
|
# }
|
72
77
|
#
|
73
78
|
class LogStash::Outputs::S3 < LogStash::Outputs::Base
|
74
|
-
|
79
|
+
require "logstash/outputs/s3/writable_directory_validator"
|
80
|
+
require "logstash/outputs/s3/path_validator"
|
81
|
+
require "logstash/outputs/s3/write_bucket_permission_validator"
|
82
|
+
require "logstash/outputs/s3/size_rotation_policy"
|
83
|
+
require "logstash/outputs/s3/time_rotation_policy"
|
84
|
+
require "logstash/outputs/s3/size_and_time_rotation_policy"
|
85
|
+
require "logstash/outputs/s3/temporary_file"
|
86
|
+
require "logstash/outputs/s3/temporary_file_factory"
|
87
|
+
require "logstash/outputs/s3/uploader"
|
88
|
+
require "logstash/outputs/s3/file_repository"
|
89
|
+
|
90
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
91
|
+
|
92
|
+
PREFIX_KEY_NORMALIZE_CHARACTER = "_"
|
93
|
+
PERIODIC_CHECK_INTERVAL_IN_SECONDS = 15
|
94
|
+
CRASH_RECOVERY_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
95
|
+
:min_threads => 1,
|
96
|
+
:max_threads => 2,
|
97
|
+
:fallback_policy => :caller_runs
|
98
|
+
})
|
75
99
|
|
76
|
-
TEMPFILE_EXTENSION = "txt"
|
77
|
-
S3_INVALID_CHARACTERS = /[\^`><]/
|
78
100
|
|
79
101
|
config_name "s3"
|
80
|
-
default :codec,
|
102
|
+
default :codec, "line"
|
81
103
|
|
82
|
-
concurrency :
|
104
|
+
concurrency :shared
|
83
105
|
|
84
106
|
# S3 bucket
|
85
|
-
config :bucket, :validate => :string
|
107
|
+
config :bucket, :validate => :string, :required => true
|
86
108
|
|
87
109
|
# Set the size of file in bytes, this means that files on bucket when have dimension > file_size, they are stored in two or more file.
|
88
110
|
# If you have tags then it will generate a specific size file for every tags
|
89
111
|
##NOTE: define size of file is the better thing, because generate a local temporary file on disk and then put it in bucket.
|
90
|
-
config :size_file, :validate => :number, :default =>
|
112
|
+
config :size_file, :validate => :number, :default => 1024 * 1024 * 5
|
91
113
|
|
92
114
|
# Set the time, in MINUTES, to close the current sub_time_section of bucket.
|
93
115
|
# If you define file_size you have a number of files in consideration of the section and the current tag.
|
94
116
|
# 0 stay all time on listerner, beware if you specific 0 and size_file 0, because you will not put the file on bucket,
|
95
117
|
# for now the only thing this plugin can do is to put the file when logstash restart.
|
96
|
-
config :time_file, :validate => :number, :default =>
|
118
|
+
config :time_file, :validate => :number, :default => 15
|
97
119
|
|
98
120
|
## IMPORTANT: if you use multiple instance of s3, you should specify on one of them the "restore=> true" and on the others "restore => false".
|
99
121
|
## This is hack for not destroy the new files after restoring the initial files.
|
100
122
|
## If you do not specify "restore => true" when logstash crashes or is restarted, the files are not sent into the bucket,
|
101
123
|
## for example if you have single Instance.
|
102
|
-
config :restore, :validate => :boolean, :default =>
|
124
|
+
config :restore, :validate => :boolean, :default => true
|
103
125
|
|
104
126
|
# The S3 canned ACL to use when putting the file. Defaults to "private".
|
105
|
-
config :canned_acl, :validate => ["private", "public_read", "public_read_write", "authenticated_read"
|
127
|
+
config :canned_acl, :validate => ["private", "public_read", "public_read_write", "authenticated_read"],
|
106
128
|
:default => "private"
|
107
129
|
|
108
130
|
# Specifies wether or not to use S3's AES256 server side encryption. Defaults to false.
|
@@ -113,10 +135,14 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
|
|
113
135
|
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
114
136
|
|
115
137
|
# Specify a prefix to the uploaded filename, this can simulate directories on S3. Prefix does not require leading slash.
|
138
|
+
# This option support string interpolation, be warned this can created a lot of temporary local files.
|
116
139
|
config :prefix, :validate => :string, :default => ''
|
117
140
|
|
118
141
|
# Specify how many workers to use to upload the files to S3
|
119
|
-
config :upload_workers_count, :validate => :number, :default =>
|
142
|
+
config :upload_workers_count, :validate => :number, :default => (Concurrent.processor_count * 0.5).ceil
|
143
|
+
|
144
|
+
# Number of items we can keep in the local queue before uploading them
|
145
|
+
config :upload_queue_size, :validate => :number, :default => 2 * (Concurrent.processor_count * 0.25).ceil
|
120
146
|
|
121
147
|
# The version of the S3 signature hash to use. Normally uses the internal client default, can be explicitly
|
122
148
|
# specified here
|
@@ -135,348 +161,202 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
|
|
135
161
|
# Specify the content encoding. Supports ("gzip"). Defaults to "none"
|
136
162
|
config :encoding, :validate => ["none", "gzip"], :default => "none"
|
137
163
|
|
138
|
-
#
|
139
|
-
|
140
|
-
|
141
|
-
attr_reader :s3
|
142
|
-
|
143
|
-
def aws_s3_config
|
144
|
-
@logger.info("Registering s3 output", :bucket => @bucket, :endpoint_region => @region)
|
145
|
-
@s3 = AWS::S3.new(full_options)
|
146
|
-
end
|
147
|
-
|
148
|
-
def full_options
|
149
|
-
aws_options_hash.merge(signature_options)
|
150
|
-
end
|
151
|
-
|
152
|
-
def signature_options
|
153
|
-
if @signature_version
|
154
|
-
{:s3_signature_version => @signature_version}
|
155
|
-
else
|
156
|
-
{}
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
def aws_service_endpoint(region)
|
161
|
-
return {
|
162
|
-
:s3_endpoint => region == 'us-east-1' ? 's3.amazonaws.com' : "s3-#{region}.amazonaws.com"
|
163
|
-
}
|
164
|
-
end
|
165
|
-
|
166
|
-
public
|
167
|
-
def write_on_bucket(file)
|
168
|
-
# find and use the bucket
|
169
|
-
bucket = @s3.buckets[@bucket]
|
170
|
-
|
171
|
-
remote_filename = "#{@prefix}#{File.basename(file)}"
|
164
|
+
# Define the strategy to use to decide when we need to rotate the file and push it to S3,
|
165
|
+
# The default strategy is to check for both size and time, the first one to match will rotate the file.
|
166
|
+
config :rotation_strategy, :validate => ["size_and_time", "size", "time"], :default => "size_and_time"
|
172
167
|
|
173
|
-
|
168
|
+
# The common use case is to define permission on the root bucket and give Logstash full access to write its logs.
|
169
|
+
# In some circonstances you need finer grained permission on subfolder, this allow you to disable the check at startup.
|
170
|
+
config :validate_credentials_on_root_bucket, :validate => :boolean, :default => true
|
174
171
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
:content_encoding => @encoding == "gzip" ? "gzip" : nil)
|
183
|
-
rescue AWS::Errors::Base => error
|
184
|
-
@logger.error("S3: AWS error", :error => error)
|
185
|
-
raise LogStash::Error, "AWS Configuration Error, #{error}"
|
172
|
+
def register
|
173
|
+
# I've move the validation of the items into custom classes
|
174
|
+
# to prepare for the new config validation that will be part of the core so the core can
|
175
|
+
# be moved easily.
|
176
|
+
unless @prefix.empty?
|
177
|
+
if !PathValidator.valid?(prefix)
|
178
|
+
raise LogStash::ConfigurationError, "Prefix must not contains: #{PathValidator::INVALID_CHARACTERS}"
|
186
179
|
end
|
187
180
|
end
|
188
181
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
# This method is used for create new empty temporary files for use. Flag is needed for indicate new subsection time_file.
|
193
|
-
public
|
194
|
-
def create_temporary_file
|
195
|
-
filename = File.join(@temporary_directory, get_temporary_filename(@page_counter))
|
196
|
-
|
197
|
-
@logger.debug("S3: Creating a new temporary file", :filename => filename)
|
198
|
-
|
199
|
-
@file_rotation_lock.synchronize do
|
200
|
-
unless @tempfile.nil?
|
201
|
-
@tempfile.close
|
202
|
-
end
|
203
|
-
|
204
|
-
if @encoding == "gzip"
|
205
|
-
@tempfile = Zlib::GzipWriter.open(filename)
|
206
|
-
else
|
207
|
-
@tempfile = File.open(filename, "a")
|
208
|
-
end
|
182
|
+
if !WritableDirectoryValidator.valid?(@temporary_directory)
|
183
|
+
raise LogStash::ConfigurationError, "Logstash must have the permissions to write to the temporary directory: #{@temporary_directory}"
|
209
184
|
end
|
210
|
-
end
|
211
185
|
|
212
|
-
|
213
|
-
|
214
|
-
require "aws-sdk"
|
215
|
-
# required if using ruby version < 2.0
|
216
|
-
# http://ruby.awsblog.com/post/Tx16QY1CI5GVBFT/Threading-with-the-AWS-SDK-for-Ruby
|
217
|
-
AWS.eager_autoload!(AWS::S3)
|
218
|
-
|
219
|
-
@s3 = aws_s3_config
|
220
|
-
@upload_queue = Queue.new
|
221
|
-
@file_rotation_lock = Mutex.new
|
222
|
-
|
223
|
-
if @prefix && @prefix =~ S3_INVALID_CHARACTERS
|
224
|
-
@logger.error("S3: prefix contains invalid characters", :prefix => @prefix, :contains => S3_INVALID_CHARACTERS)
|
225
|
-
raise LogStash::ConfigurationError, "S3: prefix contains invalid characters"
|
186
|
+
if @validate_credentials_on_root_bucket && !WriteBucketPermissionValidator.valid?(bucket_resource)
|
187
|
+
raise LogStash::ConfigurationError, "Logstash must have the privileges to write to root bucket `#{@bucket}`, check you credentials or your permissions."
|
226
188
|
end
|
227
189
|
|
228
|
-
if
|
229
|
-
|
190
|
+
if @time_file.nil? && @size_file.nil? || @size_file == 0 && @time_file == 0
|
191
|
+
raise LogStash::ConfigurationError, "The S3 plugin must have at least one of time_file or size_file set to a value greater than 0"
|
230
192
|
end
|
231
193
|
|
232
|
-
|
194
|
+
@file_repository = FileRepository.new(@tags, @encoding, @temporary_directory)
|
233
195
|
|
234
|
-
|
235
|
-
reset_page_counter
|
236
|
-
create_temporary_file
|
237
|
-
configure_periodic_rotation if time_file != 0
|
238
|
-
configure_upload_workers
|
196
|
+
@rotation = rotation_strategy
|
239
197
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
198
|
+
executor = Concurrent::ThreadPoolExecutor.new({ :min_threads => 1,
|
199
|
+
:max_threads => @upload_workers_count,
|
200
|
+
:max_queue => @upload_queue_size,
|
201
|
+
:fallback_policy => :caller_runs })
|
244
202
|
|
203
|
+
@uploader = Uploader.new(bucket_resource, @logger, executor)
|
245
204
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
def test_s3_write
|
250
|
-
@logger.debug("S3: Creating a test file on S3")
|
205
|
+
# Restoring from crash will use a new threadpool to slowly recover
|
206
|
+
# New events should have more priority.
|
207
|
+
restore_from_crash if @restore
|
251
208
|
|
252
|
-
|
253
|
-
|
209
|
+
# If we need time based rotation we need to do periodic check on the file
|
210
|
+
# to take care of file that were not updated recently
|
211
|
+
start_periodic_check if @rotation.needs_periodic?
|
212
|
+
end
|
254
213
|
|
255
|
-
|
256
|
-
|
257
|
-
end
|
214
|
+
def multi_receive_encoded(events_and_encoded)
|
215
|
+
prefix_written_to = Set.new
|
258
216
|
|
259
|
-
|
260
|
-
|
217
|
+
events_and_encoded.each do |event, encoded|
|
218
|
+
prefix_key = normalize_key(event.sprintf(@prefix))
|
219
|
+
prefix_written_to << prefix_key
|
261
220
|
|
262
221
|
begin
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
rescue
|
267
|
-
|
268
|
-
|
222
|
+
@file_repository.get_file(prefix_key) { |file| file.write(encoded) }
|
223
|
+
# The output should stop accepting new events coming in, since it cannot do anything with them anymore.
|
224
|
+
# Log the error and rethrow it.
|
225
|
+
rescue Errno::ENOSPC => e
|
226
|
+
@logger.error("S3: No space left in temporary directory", :temporary_directory => @temporary_directory)
|
227
|
+
raise e
|
269
228
|
end
|
270
|
-
ensure
|
271
|
-
File.delete(test_filename)
|
272
229
|
end
|
230
|
+
|
231
|
+
# Groups IO calls to optimize fstat checks
|
232
|
+
rotate_if_needed(prefix_written_to)
|
273
233
|
end
|
274
234
|
|
275
|
-
|
276
|
-
|
277
|
-
@logger.debug("S3: Checking for temp files from a previoius crash...")
|
235
|
+
def close
|
236
|
+
stop_periodic_check if @rotation.needs_periodic?
|
278
237
|
|
279
|
-
|
280
|
-
name_file = File.basename(file)
|
281
|
-
@logger.warn("S3: Found temporary file from crash. Uploading file to S3.", :filename => name_file)
|
282
|
-
move_file_to_bucket_async(file)
|
283
|
-
end
|
284
|
-
end
|
238
|
+
@logger.debug("Uploading current workspace")
|
285
239
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
240
|
+
# The plugin has stopped receiving new events, but we still have
|
241
|
+
# data on disk, lets make sure it get to S3.
|
242
|
+
# If Logstash get interrupted, the `restore_from_crash` (when set to true) method will pickup
|
243
|
+
# the content in the temporary directly and upload it.
|
244
|
+
# This will block the shutdown until all upload are done or the use force quit.
|
245
|
+
@file_repository.each_files do |file|
|
246
|
+
upload_file(file)
|
291
247
|
end
|
292
248
|
|
293
|
-
|
294
|
-
File.delete(file)
|
295
|
-
rescue Errno::ENOENT
|
296
|
-
# Something else deleted the file, logging but not raising the issue
|
297
|
-
@logger.warn("S3: Cannot delete the temporary file since it doesn't exist on disk", :filename => File.basename(file))
|
298
|
-
rescue Errno::EACCES
|
299
|
-
@logger.error("S3: Logstash doesnt have the permission to delete the file in the temporary directory.", :filename => File.basename(file), :temporary_directory => @temporary_directory)
|
300
|
-
end
|
301
|
-
end
|
249
|
+
@file_repository.shutdown
|
302
250
|
|
303
|
-
|
304
|
-
|
305
|
-
@time_file * 60
|
251
|
+
@uploader.stop # wait until all the current upload are complete
|
252
|
+
@crash_uploader.stop if @restore # we might have still work to do for recovery so wait until we are done
|
306
253
|
end
|
307
254
|
|
308
|
-
|
309
|
-
|
310
|
-
|
255
|
+
def full_options
|
256
|
+
options = { :credentials => credentials }
|
257
|
+
options[:s3_signature_version] = @signature_version if @signature_version
|
258
|
+
options.merge(aws_options_hash)
|
311
259
|
end
|
312
260
|
|
313
|
-
|
314
|
-
|
315
|
-
current_time = Time.now
|
316
|
-
filename = "ls.s3.#{Socket.gethostname}.#{current_time.strftime("%Y-%m-%dT%H.%M")}"
|
317
|
-
|
318
|
-
if @tags.size > 0
|
319
|
-
return "#{filename}.tag_#{@tags.join('.')}.part#{page_counter}.#{get_tempfile_extension}"
|
320
|
-
else
|
321
|
-
return "#{filename}.part#{page_counter}.#{get_tempfile_extension}"
|
322
|
-
end
|
261
|
+
def normalize_key(prefix_key)
|
262
|
+
prefix_key.gsub(PathValidator.matches_re, PREFIX_KEY_NORMALIZE_CHARACTER)
|
323
263
|
end
|
324
264
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
@
|
329
|
-
end
|
265
|
+
private
|
266
|
+
# We start a task in the background for check for stale files and make sure we rotate them to S3 if needed.
|
267
|
+
def start_periodic_check
|
268
|
+
@logger.debug("Start periodic rotation check")
|
330
269
|
|
331
|
-
|
332
|
-
|
333
|
-
@file_rotation_lock.synchronize do
|
334
|
-
tempfile_size > @size_file
|
335
|
-
end
|
336
|
-
end
|
270
|
+
@periodic_check = Concurrent::TimerTask.new(:execution_interval => PERIODIC_CHECK_INTERVAL_IN_SECONDS) do
|
271
|
+
@logger.debug("Periodic check for stale files")
|
337
272
|
|
338
|
-
|
339
|
-
def tempfile_size
|
340
|
-
if @tempfile.instance_of? File
|
341
|
-
@tempfile.size
|
342
|
-
elsif @tempfile.instance_of? Zlib::GzipWriter
|
343
|
-
@tempfile.tell
|
344
|
-
else
|
345
|
-
raise LogStash::Error, "Unable to get size of temp file of type #{@tempfile.class}"
|
273
|
+
rotate_if_needed(@file_repository.keys)
|
346
274
|
end
|
347
|
-
end
|
348
275
|
|
349
|
-
|
350
|
-
def write_events_to_multiple_files?
|
351
|
-
@size_file > 0
|
276
|
+
@periodic_check.execute
|
352
277
|
end
|
353
278
|
|
354
|
-
|
355
|
-
|
356
|
-
begin
|
357
|
-
@logger.debug("S3: put event into tempfile ", :tempfile => File.basename(@tempfile.path))
|
358
|
-
|
359
|
-
@file_rotation_lock.synchronize do
|
360
|
-
@tempfile.write(event)
|
361
|
-
end
|
362
|
-
rescue Errno::ENOSPC
|
363
|
-
@logger.error("S3: No space left in temporary directory", :temporary_directory => @temporary_directory)
|
364
|
-
close
|
365
|
-
end
|
279
|
+
def stop_periodic_check
|
280
|
+
@periodic_check.shutdown
|
366
281
|
end
|
367
282
|
|
368
|
-
|
369
|
-
|
370
|
-
shutdown_upload_workers
|
371
|
-
@periodic_rotation_thread.stop! if @periodic_rotation_thread
|
372
|
-
|
373
|
-
@file_rotation_lock.synchronize do
|
374
|
-
@tempfile.close unless @tempfile.nil? && @tempfile.closed?
|
375
|
-
end
|
283
|
+
def bucket_resource
|
284
|
+
Aws::S3::Bucket.new(@bucket, { :credentials => credentials }.merge(aws_options_hash))
|
376
285
|
end
|
377
286
|
|
378
|
-
|
379
|
-
|
380
|
-
@logger.debug("S3: Gracefully shutdown the upload workers")
|
381
|
-
@upload_queue << LogStash::SHUTDOWN
|
287
|
+
def aws_service_endpoint(region)
|
288
|
+
{ :s3_endpoint => region == 'us-east-1' ? 's3.amazonaws.com' : "s3-#{region}.amazonaws.com"}
|
382
289
|
end
|
383
290
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
tempfile_path = @tempfile.path
|
391
|
-
# close and start next file before sending the previous one
|
392
|
-
next_page
|
393
|
-
create_temporary_file
|
394
|
-
|
395
|
-
# send to s3
|
396
|
-
move_file_to_bucket_async(tempfile_path)
|
397
|
-
else
|
398
|
-
@logger.debug("S3: tempfile file size report.", :tempfile_size => tempfile_size, :size_file => @size_file)
|
399
|
-
end
|
400
|
-
end
|
401
|
-
|
402
|
-
write_to_tempfile(encoded_event)
|
291
|
+
def upload_options
|
292
|
+
{
|
293
|
+
:acl => @cannel_acl,
|
294
|
+
:server_side_encryption => @server_side_encryption ? :aes256 : nil,
|
295
|
+
:content_encoding => @encoding == "gzip" ? "gzip" : nil
|
296
|
+
}
|
403
297
|
end
|
404
298
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
299
|
+
def rotate_if_needed(prefixes)
|
300
|
+
prefixes.each do |prefix|
|
301
|
+
# Each file access is thread safe,
|
302
|
+
# until the rotation is done then only
|
303
|
+
# one thread has access to the resource.
|
304
|
+
@file_repository.get_factory(prefix) do |factory|
|
305
|
+
temp_file = factory.current
|
306
|
+
|
307
|
+
if @rotation.rotate?(temp_file)
|
308
|
+
@logger.debug("Rotate file",
|
309
|
+
:strategy => @rotation.class.name,
|
310
|
+
:key => temp_file.key,
|
311
|
+
:path => temp_file.path)
|
312
|
+
|
313
|
+
upload_file(temp_file)
|
314
|
+
factory.rotate!
|
315
|
+
end
|
420
316
|
end
|
421
317
|
end
|
422
318
|
end
|
423
319
|
|
424
|
-
|
425
|
-
|
426
|
-
@logger.debug("S3: Configure upload workers")
|
427
|
-
|
428
|
-
@upload_workers = @upload_workers_count.times.map do |worker_id|
|
429
|
-
Stud::Task.new do
|
430
|
-
LogStash::Util::set_thread_name("<S3 upload worker #{worker_id}")
|
431
|
-
|
432
|
-
continue = true
|
433
|
-
while continue do
|
434
|
-
@logger.debug("S3: upload worker is waiting for a new file to upload.", :worker_id => worker_id)
|
320
|
+
def upload_file(temp_file)
|
321
|
+
@logger.debug("Queue for upload", :path => temp_file.path)
|
435
322
|
|
436
|
-
|
437
|
-
|
438
|
-
|
323
|
+
# if the queue is full the calling thread will be used to upload
|
324
|
+
temp_file.close # make sure the content is on disk
|
325
|
+
if temp_file.size > 0
|
326
|
+
@uploader.upload_async(temp_file,
|
327
|
+
:on_complete => method(:clean_temporary_file),
|
328
|
+
:upload_options => upload_options )
|
439
329
|
end
|
440
330
|
end
|
441
331
|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
@upload_queue.enq(LogStash::SHUTDOWN)
|
451
|
-
false
|
452
|
-
else
|
453
|
-
@logger.debug("S3: upload working is uploading a new file", :filename => File.basename(file))
|
454
|
-
move_file_to_bucket(file)
|
455
|
-
true
|
456
|
-
end
|
457
|
-
rescue Exception => ex
|
458
|
-
@logger.error("failed to upload, will re-enqueue #{file} for upload",
|
459
|
-
:ex => ex, :backtrace => ex.backtrace)
|
460
|
-
unless file.nil? # Rare case if the first line of the begin doesn't execute
|
461
|
-
@upload_queue.enq(file)
|
462
|
-
end
|
463
|
-
true
|
332
|
+
def rotation_strategy
|
333
|
+
case @rotation_strategy
|
334
|
+
when "size"
|
335
|
+
SizeRotationPolicy.new(size_file)
|
336
|
+
when "time"
|
337
|
+
TimeRotationPolicy.new(time_file)
|
338
|
+
when "size_and_time"
|
339
|
+
SizeAndTimeRotationPolicy.new(size_file, time_file)
|
464
340
|
end
|
465
341
|
end
|
466
342
|
|
467
|
-
|
468
|
-
|
469
|
-
|
343
|
+
def clean_temporary_file(file)
|
344
|
+
@logger.debug("Removing temporary file", :file => file.path)
|
345
|
+
file.delete!
|
470
346
|
end
|
471
347
|
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
348
|
+
# The upload process will use a separate uploader/threadpool with less resource allocated to it.
|
349
|
+
# but it will use an unbounded queue for the work, it may take some time before all the older files get processed.
|
350
|
+
def restore_from_crash
|
351
|
+
@crash_uploader = Uploader.new(bucket_resource, @logger, CRASH_RECOVERY_THREADPOOL)
|
352
|
+
|
353
|
+
temp_folder_path = Pathname.new(@temporary_directory)
|
354
|
+
Dir.glob(::File.join(@temporary_directory, "**/*"))
|
355
|
+
.select { |file| ::File.file?(file) }
|
356
|
+
.each do |file|
|
357
|
+
temp_file = TemporaryFile.create_from_existing_file(file, temp_folder_path)
|
358
|
+
@logger.debug("Recovering from crash and uploading", :file => temp_file.path)
|
359
|
+
@crash_uploader.upload_async(temp_file, :on_complete => method(:clean_temporary_file))
|
360
|
+
end
|
481
361
|
end
|
482
362
|
end
|