logstash-output-s3 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/lib/logstash/outputs/s3.rb +188 -308
  4. data/lib/logstash/outputs/s3/file_repository.rb +120 -0
  5. data/lib/logstash/outputs/s3/patch.rb +22 -0
  6. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  7. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  8. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  9. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  10. data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
  11. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  12. data/lib/logstash/outputs/s3/uploader.rb +59 -0
  13. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  14. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
  15. data/logstash-output-s3.gemspec +2 -2
  16. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  17. data/spec/integration/gzip_file_spec.rb +62 -0
  18. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  19. data/spec/integration/restore_from_crash_spec.rb +39 -0
  20. data/spec/integration/size_rotation_spec.rb +59 -0
  21. data/spec/integration/stress_test_spec.rb +60 -0
  22. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  23. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
  24. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  25. data/spec/outputs/s3/file_repository_spec.rb +146 -0
  26. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  27. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  28. data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
  29. data/spec/outputs/s3/temporary_file_spec.rb +40 -0
  30. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  31. data/spec/outputs/s3/uploader_spec.rb +57 -0
  32. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  33. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
  34. data/spec/outputs/s3_spec.rb +52 -335
  35. data/spec/spec_helper.rb +6 -0
  36. data/spec/supports/helpers.rb +33 -9
  37. metadata +65 -4
  38. data/spec/integration/s3_spec.rb +0 -97
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e681a1dd7ceea2cedb08ddd6dfc6eae4310a48cb
4
- data.tar.gz: c86a97c06c234211329e24184370b1a0fdf19fc7
3
+ metadata.gz: 1640533d698e2ce25c2bdc426ab18d7a7447bdb5
4
+ data.tar.gz: 0af7083fdb8848e0057e8ef537a20e884add3208
5
5
  SHA512:
6
- metadata.gz: 139c3e3749e83a6490ecd982294ab65e6c8628aeb246d9b04f30cdd7f05640c6afd0865133e6c0f017e307d81f2438a675b4024d16a0ece34266ce74d7b46c68
7
- data.tar.gz: 1d6b05b4aa255c1db7887d93e8eed9f6781fd923312a38095e7bea6eaff56db49641c2b78263c757b46f522df6131cebe6f28e3af331d0ebb7bdb0c3ac0cbd82
6
+ metadata.gz: 8d8f14939c20adef00b8def8a4ef1cfed06a6f577ece6d741def4b7fc383febce068a2e49a4a91e41fc3ef18aa23ffcbd5fe7d9eb43d3028672bd3bf0656911d
7
+ data.tar.gz: c9b39d6715391c33fa5a4fcf0f0485bd4a197e1a5abb000460552b1011ceb9c31c1be3e9597081db607c84782bcf51d68cc82b970e58159fd83e1807716d1611
data/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ ## 4.0.0
2
+ - This version is a complete rewrite over version 3.0.0 See #103
3
+ - This Plugin now uses the V2 version of the SDK, this make sure we receive the latest updates and changes.
4
+ - We now uses S3's `upload_file` instead of reading chunks, this method is more efficient and will uses the multipart with threads if the files is too big.
5
+ - You can now use the `fieldref` syntax in the prefix to dynamically changes the target with the events it receives.
6
+ - The Upload queue is now a bounded list, this options is necessary to allow back pressure to be communicated back to the pipeline but its configurable by the user.
7
+ - If the queue is full the plugin will start the upload in the current thread.
8
+ - The plugin now threadsafe and support the concurrency model `shared`
9
+ - The rotation strategy can be selected, the recommended is `size_and_time` that will check for both the configured limits (`size` and `time` are also available)
10
+ - The `restore` option will now use a separate threadpool with an unbounded queue
11
+ - The `restore` option will not block the launch of logstash and will uses less resources than the real time path
12
+ - The plugin now uses `multi_receive_encode`, this will optimize the writes to the files
13
+ - rotate operation are now batched to reduce the number of IO calls.
14
+ - Empty file will not be uploaded by any rotation rotation strategy
15
+ - We now use Concurrent-Ruby for the implementation of the java executor
16
+ - If you have finer grain permission on prefixes or want faster boot, you can disable the credentials check with `validate_credentials_on_root_bucket`
17
+ - The credentials check will no longer fails if we can't delete the file
18
+ - We now have a full suite of integration test for all the defined rotation
19
+
20
+ Fixes: #4 #81 #44 #59 #50
21
+
1
22
  ## 3.2.0
2
23
  - Move to the new concurrency model `:single`
3
24
  - use correct license identifier #99
@@ -4,11 +4,17 @@ require "logstash/namespace"
4
4
  require "logstash/plugin_mixins/aws_config"
5
5
  require "stud/temporary"
6
6
  require "stud/task"
7
- require "socket" # for Socket.gethostname
7
+ require "concurrent"
8
+ require "socket"
8
9
  require "thread"
9
10
  require "tmpdir"
10
11
  require "fileutils"
12
+ require "set"
13
+ require "pathname"
14
+ require "aws-sdk"
15
+ require "logstash/outputs/s3/patch"
11
16
 
17
+ Aws.eager_autoload!
12
18
 
13
19
  # INFORMATION:
14
20
  #
@@ -17,35 +23,34 @@ require "fileutils"
17
23
  # Requirements:
18
24
  # * Amazon S3 Bucket and S3 Access Permissions (Typically access_key_id and secret_access_key)
19
25
  # * S3 PutObject permission
20
- # * Run logstash as superuser to establish connection
21
26
  #
22
- # S3 outputs create temporary files into "/opt/logstash/S3_temp/". If you want, you can change the path at the start of register method.
27
+ # S3 outputs create temporary files into the OS' temporary directory, you can specify where to save them using the `temporary_directory` option.
23
28
  #
24
29
  # S3 output files have the following format
25
30
  #
26
31
  # ls.s3.ip-10-228-27-95.2013-04-18T10.00.tag_hello.part0.txt
27
32
  #
28
- # ls.s3 : indicate logstash plugin s3
29
33
  #
30
- # "ip-10-228-27-95" : indicates the ip of your machine.
31
- # "2013-04-18T10.00" : represents the time whenever you specify time_file.
32
- # "tag_hello" : this indicates the event's tag.
33
- # "part0" : this means if you indicate size_file then it will generate more parts if you file.size > size_file.
34
- # When a file is full it will be pushed to the bucket and then deleted from the temporary directory.
35
- # If a file is empty, it is simply deleted. Empty files will not be pushed
34
+ # |=======
35
+ # | ls.s3 | indicate logstash plugin s3 |
36
+ # | ip-10-228-27-95 | indicates the ip of your machine. |
37
+ # | 2013-04-18T10.00 | represents the time whenever you specify time_file. |
38
+ # | tag_hello | this indicates the event's tag. |
39
+ # | part0 | this means if you indicate size_file then it will generate more parts if you file.size > size_file. When a file is full it will be pushed to the bucket and then deleted from the temporary directory. If a file is empty, it is simply deleted. Empty files will not be pushed |
40
+ # |=======
36
41
  #
37
42
  # Crash Recovery:
38
- # * This plugin will recover and upload temporary log files after crash/abnormal termination
43
+ # * This plugin will recover and upload temporary log files after crash/abnormal termination when using `restore` set to true
39
44
  #
40
45
  ##[Note regarding time_file and size_file] :
41
46
  #
42
- # Both time_file and size_file settings can trigger a log "file rotation"
43
- # A log rotation pushes the current log "part" to s3 and deleted from local temporary storage.
47
+ ## Both time_file and size_file settings can trigger a log "file rotation"
48
+ ## A log rotation pushes the current log "part" to s3 and deleted from local temporary storage.
44
49
  #
45
50
  ## If you specify BOTH size_file and time_file then it will create file for each tag (if specified).
46
51
  ## When EITHER time_file minutes have elapsed OR log file size > size_file, a log rotation is triggered.
47
52
  ##
48
- ## If you ONLY specify time_file but NOT file_size, one file for each tag (if specified) will be created..
53
+ ## If you ONLY specify time_file but NOT file_size, one file for each tag (if specified) will be created.
49
54
  ## When time_file minutes elapses, a log rotation will be triggered.
50
55
  #
51
56
  ## If you ONLY specify size_file, but NOT time_file, one files for each tag (if specified) will be created.
@@ -63,46 +68,63 @@ require "fileutils"
63
68
  # access_key_id => "crazy_key" (required)
64
69
  # secret_access_key => "monkey_access_key" (required)
65
70
  # region => "eu-west-1" (optional, default = "us-east-1")
66
- # bucket => "boss_please_open_your_bucket" (required)
71
+ # bucket => "your_bucket" (required)
67
72
  # size_file => 2048 (optional) - Bytes
68
73
  # time_file => 5 (optional) - Minutes
69
74
  # format => "plain" (optional)
70
- # canned_acl => "private" (optional. Options are "private", "public_read", "public_read_write", "authenticated_read", "bucket_owner_full_control". Defaults to "private" )
75
+ # canned_acl => "private" (optional. Options are "private", "public_read", "public_read_write", "authenticated_read". Defaults to "private" )
71
76
  # }
72
77
  #
73
78
  class LogStash::Outputs::S3 < LogStash::Outputs::Base
74
- include LogStash::PluginMixins::AwsConfig
79
+ require "logstash/outputs/s3/writable_directory_validator"
80
+ require "logstash/outputs/s3/path_validator"
81
+ require "logstash/outputs/s3/write_bucket_permission_validator"
82
+ require "logstash/outputs/s3/size_rotation_policy"
83
+ require "logstash/outputs/s3/time_rotation_policy"
84
+ require "logstash/outputs/s3/size_and_time_rotation_policy"
85
+ require "logstash/outputs/s3/temporary_file"
86
+ require "logstash/outputs/s3/temporary_file_factory"
87
+ require "logstash/outputs/s3/uploader"
88
+ require "logstash/outputs/s3/file_repository"
89
+
90
+ include LogStash::PluginMixins::AwsConfig::V2
91
+
92
+ PREFIX_KEY_NORMALIZE_CHARACTER = "_"
93
+ PERIODIC_CHECK_INTERVAL_IN_SECONDS = 15
94
+ CRASH_RECOVERY_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
95
+ :min_threads => 1,
96
+ :max_threads => 2,
97
+ :fallback_policy => :caller_runs
98
+ })
75
99
 
76
- TEMPFILE_EXTENSION = "txt"
77
- S3_INVALID_CHARACTERS = /[\^`><]/
78
100
 
79
101
  config_name "s3"
80
- default :codec, 'line'
102
+ default :codec, "line"
81
103
 
82
- concurrency :single
104
+ concurrency :shared
83
105
 
84
106
  # S3 bucket
85
- config :bucket, :validate => :string
107
+ config :bucket, :validate => :string, :required => true
86
108
 
87
109
  # Set the size of file in bytes, this means that files on bucket when have dimension > file_size, they are stored in two or more file.
88
110
  # If you have tags then it will generate a specific size file for every tags
89
111
  ##NOTE: define size of file is the better thing, because generate a local temporary file on disk and then put it in bucket.
90
- config :size_file, :validate => :number, :default => 0
112
+ config :size_file, :validate => :number, :default => 1024 * 1024 * 5
91
113
 
92
114
  # Set the time, in MINUTES, to close the current sub_time_section of bucket.
93
115
  # If you define file_size you have a number of files in consideration of the section and the current tag.
94
116
  # 0 stay all time on listerner, beware if you specific 0 and size_file 0, because you will not put the file on bucket,
95
117
  # for now the only thing this plugin can do is to put the file when logstash restart.
96
- config :time_file, :validate => :number, :default => 0
118
+ config :time_file, :validate => :number, :default => 15
97
119
 
98
120
  ## IMPORTANT: if you use multiple instance of s3, you should specify on one of them the "restore=> true" and on the others "restore => false".
99
121
  ## This is hack for not destroy the new files after restoring the initial files.
100
122
  ## If you do not specify "restore => true" when logstash crashes or is restarted, the files are not sent into the bucket,
101
123
  ## for example if you have single Instance.
102
- config :restore, :validate => :boolean, :default => false
124
+ config :restore, :validate => :boolean, :default => true
103
125
 
104
126
  # The S3 canned ACL to use when putting the file. Defaults to "private".
105
- config :canned_acl, :validate => ["private", "public_read", "public_read_write", "authenticated_read", "bucket_owner_full_control"],
127
+ config :canned_acl, :validate => ["private", "public_read", "public_read_write", "authenticated_read"],
106
128
  :default => "private"
107
129
 
108
130
  # Specifies wether or not to use S3's AES256 server side encryption. Defaults to false.
@@ -113,10 +135,14 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
113
135
  config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
114
136
 
115
137
  # Specify a prefix to the uploaded filename, this can simulate directories on S3. Prefix does not require leading slash.
138
+ # This option support string interpolation, be warned this can created a lot of temporary local files.
116
139
  config :prefix, :validate => :string, :default => ''
117
140
 
118
141
  # Specify how many workers to use to upload the files to S3
119
- config :upload_workers_count, :validate => :number, :default => 1
142
+ config :upload_workers_count, :validate => :number, :default => (Concurrent.processor_count * 0.5).ceil
143
+
144
+ # Number of items we can keep in the local queue before uploading them
145
+ config :upload_queue_size, :validate => :number, :default => 2 * (Concurrent.processor_count * 0.25).ceil
120
146
 
121
147
  # The version of the S3 signature hash to use. Normally uses the internal client default, can be explicitly
122
148
  # specified here
@@ -135,348 +161,202 @@ class LogStash::Outputs::S3 < LogStash::Outputs::Base
135
161
  # Specify the content encoding. Supports ("gzip"). Defaults to "none"
136
162
  config :encoding, :validate => ["none", "gzip"], :default => "none"
137
163
 
138
- # Exposed attributes for testing purpose.
139
- attr_accessor :tempfile
140
- attr_reader :page_counter, :upload_workers
141
- attr_reader :s3
142
-
143
- def aws_s3_config
144
- @logger.info("Registering s3 output", :bucket => @bucket, :endpoint_region => @region)
145
- @s3 = AWS::S3.new(full_options)
146
- end
147
-
148
- def full_options
149
- aws_options_hash.merge(signature_options)
150
- end
151
-
152
- def signature_options
153
- if @signature_version
154
- {:s3_signature_version => @signature_version}
155
- else
156
- {}
157
- end
158
- end
159
-
160
- def aws_service_endpoint(region)
161
- return {
162
- :s3_endpoint => region == 'us-east-1' ? 's3.amazonaws.com' : "s3-#{region}.amazonaws.com"
163
- }
164
- end
165
-
166
- public
167
- def write_on_bucket(file)
168
- # find and use the bucket
169
- bucket = @s3.buckets[@bucket]
170
-
171
- remote_filename = "#{@prefix}#{File.basename(file)}"
164
+ # Define the strategy to use to decide when we need to rotate the file and push it to S3,
165
+ # The default strategy is to check for both size and time, the first one to match will rotate the file.
166
+ config :rotation_strategy, :validate => ["size_and_time", "size", "time"], :default => "size_and_time"
172
167
 
173
- @logger.debug("S3: ready to write file in bucket", :remote_filename => remote_filename, :bucket => @bucket)
168
+ # The common use case is to define permission on the root bucket and give Logstash full access to write its logs.
169
+ # In some circonstances you need finer grained permission on subfolder, this allow you to disable the check at startup.
170
+ config :validate_credentials_on_root_bucket, :validate => :boolean, :default => true
174
171
 
175
- File.open(file, 'r') do |fileIO|
176
- begin
177
- # prepare for write the file
178
- object = bucket.objects[remote_filename]
179
- object.write(fileIO,
180
- :acl => @canned_acl,
181
- :server_side_encryption => @server_side_encryption ? :aes256 : nil,
182
- :content_encoding => @encoding == "gzip" ? "gzip" : nil)
183
- rescue AWS::Errors::Base => error
184
- @logger.error("S3: AWS error", :error => error)
185
- raise LogStash::Error, "AWS Configuration Error, #{error}"
172
+ def register
173
+ # I've move the validation of the items into custom classes
174
+ # to prepare for the new config validation that will be part of the core so the core can
175
+ # be moved easily.
176
+ unless @prefix.empty?
177
+ if !PathValidator.valid?(prefix)
178
+ raise LogStash::ConfigurationError, "Prefix must not contains: #{PathValidator::INVALID_CHARACTERS}"
186
179
  end
187
180
  end
188
181
 
189
- @logger.debug("S3: has written remote file in bucket with canned ACL", :remote_filename => remote_filename, :bucket => @bucket, :canned_acl => @canned_acl)
190
- end
191
-
192
- # This method is used for create new empty temporary files for use. Flag is needed for indicate new subsection time_file.
193
- public
194
- def create_temporary_file
195
- filename = File.join(@temporary_directory, get_temporary_filename(@page_counter))
196
-
197
- @logger.debug("S3: Creating a new temporary file", :filename => filename)
198
-
199
- @file_rotation_lock.synchronize do
200
- unless @tempfile.nil?
201
- @tempfile.close
202
- end
203
-
204
- if @encoding == "gzip"
205
- @tempfile = Zlib::GzipWriter.open(filename)
206
- else
207
- @tempfile = File.open(filename, "a")
208
- end
182
+ if !WritableDirectoryValidator.valid?(@temporary_directory)
183
+ raise LogStash::ConfigurationError, "Logstash must have the permissions to write to the temporary directory: #{@temporary_directory}"
209
184
  end
210
- end
211
185
 
212
- public
213
- def register
214
- require "aws-sdk"
215
- # required if using ruby version < 2.0
216
- # http://ruby.awsblog.com/post/Tx16QY1CI5GVBFT/Threading-with-the-AWS-SDK-for-Ruby
217
- AWS.eager_autoload!(AWS::S3)
218
-
219
- @s3 = aws_s3_config
220
- @upload_queue = Queue.new
221
- @file_rotation_lock = Mutex.new
222
-
223
- if @prefix && @prefix =~ S3_INVALID_CHARACTERS
224
- @logger.error("S3: prefix contains invalid characters", :prefix => @prefix, :contains => S3_INVALID_CHARACTERS)
225
- raise LogStash::ConfigurationError, "S3: prefix contains invalid characters"
186
+ if @validate_credentials_on_root_bucket && !WriteBucketPermissionValidator.valid?(bucket_resource)
187
+ raise LogStash::ConfigurationError, "Logstash must have the privileges to write to root bucket `#{@bucket}`, check you credentials or your permissions."
226
188
  end
227
189
 
228
- if !Dir.exist?(@temporary_directory)
229
- FileUtils.mkdir_p(@temporary_directory)
190
+ if @time_file.nil? && @size_file.nil? || @size_file == 0 && @time_file == 0
191
+ raise LogStash::ConfigurationError, "The S3 plugin must have at least one of time_file or size_file set to a value greater than 0"
230
192
  end
231
193
 
232
- test_s3_write
194
+ @file_repository = FileRepository.new(@tags, @encoding, @temporary_directory)
233
195
 
234
- restore_from_crashes if @restore == true
235
- reset_page_counter
236
- create_temporary_file
237
- configure_periodic_rotation if time_file != 0
238
- configure_upload_workers
196
+ @rotation = rotation_strategy
239
197
 
240
- @codec.on_event do |event, encoded_event|
241
- handle_event(encoded_event)
242
- end
243
- end
198
+ executor = Concurrent::ThreadPoolExecutor.new({ :min_threads => 1,
199
+ :max_threads => @upload_workers_count,
200
+ :max_queue => @upload_queue_size,
201
+ :fallback_policy => :caller_runs })
244
202
 
203
+ @uploader = Uploader.new(bucket_resource, @logger, executor)
245
204
 
246
- # Use the same method that Amazon use to check
247
- # permission on the user bucket by creating a small file
248
- public
249
- def test_s3_write
250
- @logger.debug("S3: Creating a test file on S3")
205
+ # Restoring from crash will use a new threadpool to slowly recover
206
+ # New events should have more priority.
207
+ restore_from_crash if @restore
251
208
 
252
- test_filename = File.join(@temporary_directory,
253
- "logstash-programmatic-access-test-object-#{Time.now.to_i}")
209
+ # If we need time based rotation we need to do periodic check on the file
210
+ # to take care of file that were not updated recently
211
+ start_periodic_check if @rotation.needs_periodic?
212
+ end
254
213
 
255
- File.open(test_filename, 'a') do |file|
256
- file.write('test')
257
- end
214
+ def multi_receive_encoded(events_and_encoded)
215
+ prefix_written_to = Set.new
258
216
 
259
- begin
260
- write_on_bucket(test_filename)
217
+ events_and_encoded.each do |event, encoded|
218
+ prefix_key = normalize_key(event.sprintf(@prefix))
219
+ prefix_written_to << prefix_key
261
220
 
262
221
  begin
263
- remote_filename = "#{@prefix}#{File.basename(test_filename)}"
264
- bucket = @s3.buckets[@bucket]
265
- bucket.objects[remote_filename].delete
266
- rescue StandardError => e
267
- # we actually only need `put_object`, but if we dont delete them
268
- # we can have a lot of tests files
222
+ @file_repository.get_file(prefix_key) { |file| file.write(encoded) }
223
+ # The output should stop accepting new events coming in, since it cannot do anything with them anymore.
224
+ # Log the error and rethrow it.
225
+ rescue Errno::ENOSPC => e
226
+ @logger.error("S3: No space left in temporary directory", :temporary_directory => @temporary_directory)
227
+ raise e
269
228
  end
270
- ensure
271
- File.delete(test_filename)
272
229
  end
230
+
231
+ # Groups IO calls to optimize fstat checks
232
+ rotate_if_needed(prefix_written_to)
273
233
  end
274
234
 
275
- public
276
- def restore_from_crashes
277
- @logger.debug("S3: Checking for temp files from a previoius crash...")
235
+ def close
236
+ stop_periodic_check if @rotation.needs_periodic?
278
237
 
279
- Dir[File.join(@temporary_directory, "*.#{get_tempfile_extension}")].each do |file|
280
- name_file = File.basename(file)
281
- @logger.warn("S3: Found temporary file from crash. Uploading file to S3.", :filename => name_file)
282
- move_file_to_bucket_async(file)
283
- end
284
- end
238
+ @logger.debug("Uploading current workspace")
285
239
 
286
- public
287
- def move_file_to_bucket(file)
288
- if !File.zero?(file)
289
- write_on_bucket(file)
290
- @logger.debug("S3: File was put on the upload thread", :filename => File.basename(file), :bucket => @bucket)
240
+ # The plugin has stopped receiving new events, but we still have
241
+ # data on disk, lets make sure it get to S3.
242
+ # If Logstash get interrupted, the `restore_from_crash` (when set to true) method will pickup
243
+ # the content in the temporary directly and upload it.
244
+ # This will block the shutdown until all upload are done or the use force quit.
245
+ @file_repository.each_files do |file|
246
+ upload_file(file)
291
247
  end
292
248
 
293
- begin
294
- File.delete(file)
295
- rescue Errno::ENOENT
296
- # Something else deleted the file, logging but not raising the issue
297
- @logger.warn("S3: Cannot delete the temporary file since it doesn't exist on disk", :filename => File.basename(file))
298
- rescue Errno::EACCES
299
- @logger.error("S3: Logstash doesnt have the permission to delete the file in the temporary directory.", :filename => File.basename(file), :temporary_directory => @temporary_directory)
300
- end
301
- end
249
+ @file_repository.shutdown
302
250
 
303
- public
304
- def periodic_interval
305
- @time_file * 60
251
+ @uploader.stop # wait until all the current upload are complete
252
+ @crash_uploader.stop if @restore # we might have still work to do for recovery so wait until we are done
306
253
  end
307
254
 
308
- private
309
- def get_tempfile_extension
310
- @encoding == "gzip" ? "#{TEMPFILE_EXTENSION}.gz" : "#{TEMPFILE_EXTENSION}"
255
+ def full_options
256
+ options = { :credentials => credentials }
257
+ options[:s3_signature_version] = @signature_version if @signature_version
258
+ options.merge(aws_options_hash)
311
259
  end
312
260
 
313
- public
314
- def get_temporary_filename(page_counter = 0)
315
- current_time = Time.now
316
- filename = "ls.s3.#{Socket.gethostname}.#{current_time.strftime("%Y-%m-%dT%H.%M")}"
317
-
318
- if @tags.size > 0
319
- return "#{filename}.tag_#{@tags.join('.')}.part#{page_counter}.#{get_tempfile_extension}"
320
- else
321
- return "#{filename}.part#{page_counter}.#{get_tempfile_extension}"
322
- end
261
+ def normalize_key(prefix_key)
262
+ prefix_key.gsub(PathValidator.matches_re, PREFIX_KEY_NORMALIZE_CHARACTER)
323
263
  end
324
264
 
325
- public
326
- def receive(event)
327
-
328
- @codec.encode(event)
329
- end
265
+ private
266
+ # We start a task in the background for check for stale files and make sure we rotate them to S3 if needed.
267
+ def start_periodic_check
268
+ @logger.debug("Start periodic rotation check")
330
269
 
331
- public
332
- def rotate_events_log?
333
- @file_rotation_lock.synchronize do
334
- tempfile_size > @size_file
335
- end
336
- end
270
+ @periodic_check = Concurrent::TimerTask.new(:execution_interval => PERIODIC_CHECK_INTERVAL_IN_SECONDS) do
271
+ @logger.debug("Periodic check for stale files")
337
272
 
338
- private
339
- def tempfile_size
340
- if @tempfile.instance_of? File
341
- @tempfile.size
342
- elsif @tempfile.instance_of? Zlib::GzipWriter
343
- @tempfile.tell
344
- else
345
- raise LogStash::Error, "Unable to get size of temp file of type #{@tempfile.class}"
273
+ rotate_if_needed(@file_repository.keys)
346
274
  end
347
- end
348
275
 
349
- public
350
- def write_events_to_multiple_files?
351
- @size_file > 0
276
+ @periodic_check.execute
352
277
  end
353
278
 
354
- public
355
- def write_to_tempfile(event)
356
- begin
357
- @logger.debug("S3: put event into tempfile ", :tempfile => File.basename(@tempfile.path))
358
-
359
- @file_rotation_lock.synchronize do
360
- @tempfile.write(event)
361
- end
362
- rescue Errno::ENOSPC
363
- @logger.error("S3: No space left in temporary directory", :temporary_directory => @temporary_directory)
364
- close
365
- end
279
+ def stop_periodic_check
280
+ @periodic_check.shutdown
366
281
  end
367
282
 
368
- public
369
- def close
370
- shutdown_upload_workers
371
- @periodic_rotation_thread.stop! if @periodic_rotation_thread
372
-
373
- @file_rotation_lock.synchronize do
374
- @tempfile.close unless @tempfile.nil? && @tempfile.closed?
375
- end
283
+ def bucket_resource
284
+ Aws::S3::Bucket.new(@bucket, { :credentials => credentials }.merge(aws_options_hash))
376
285
  end
377
286
 
378
- private
379
- def shutdown_upload_workers
380
- @logger.debug("S3: Gracefully shutdown the upload workers")
381
- @upload_queue << LogStash::SHUTDOWN
287
+ def aws_service_endpoint(region)
288
+ { :s3_endpoint => region == 'us-east-1' ? 's3.amazonaws.com' : "s3-#{region}.amazonaws.com"}
382
289
  end
383
290
 
384
- private
385
- def handle_event(encoded_event)
386
- if write_events_to_multiple_files?
387
- if rotate_events_log?
388
- @logger.debug("S3: tempfile is too large, let's bucket it and create new file", :tempfile => File.basename(@tempfile.path))
389
-
390
- tempfile_path = @tempfile.path
391
- # close and start next file before sending the previous one
392
- next_page
393
- create_temporary_file
394
-
395
- # send to s3
396
- move_file_to_bucket_async(tempfile_path)
397
- else
398
- @logger.debug("S3: tempfile file size report.", :tempfile_size => tempfile_size, :size_file => @size_file)
399
- end
400
- end
401
-
402
- write_to_tempfile(encoded_event)
291
+ def upload_options
292
+ {
293
+ :acl => @cannel_acl,
294
+ :server_side_encryption => @server_side_encryption ? :aes256 : nil,
295
+ :content_encoding => @encoding == "gzip" ? "gzip" : nil
296
+ }
403
297
  end
404
298
 
405
- private
406
- def configure_periodic_rotation
407
- @periodic_rotation_thread = Stud::Task.new do
408
- LogStash::Util::set_thread_name("<S3 periodic uploader")
409
-
410
- Stud.interval(periodic_interval, :sleep_then_run => true) do
411
- @logger.debug("S3: time_file triggered, bucketing the file", :filename => @tempfile.path)
412
-
413
- tempfile_path = @tempfile.path
414
- # close and start next file before sending the previous one
415
- next_page
416
- create_temporary_file
417
-
418
- # send to s3
419
- move_file_to_bucket_async(tempfile_path)
299
+ def rotate_if_needed(prefixes)
300
+ prefixes.each do |prefix|
301
+ # Each file access is thread safe,
302
+ # until the rotation is done then only
303
+ # one thread has access to the resource.
304
+ @file_repository.get_factory(prefix) do |factory|
305
+ temp_file = factory.current
306
+
307
+ if @rotation.rotate?(temp_file)
308
+ @logger.debug("Rotate file",
309
+ :strategy => @rotation.class.name,
310
+ :key => temp_file.key,
311
+ :path => temp_file.path)
312
+
313
+ upload_file(temp_file)
314
+ factory.rotate!
315
+ end
420
316
  end
421
317
  end
422
318
  end
423
319
 
424
- private
425
- def configure_upload_workers
426
- @logger.debug("S3: Configure upload workers")
427
-
428
- @upload_workers = @upload_workers_count.times.map do |worker_id|
429
- Stud::Task.new do
430
- LogStash::Util::set_thread_name("<S3 upload worker #{worker_id}")
431
-
432
- continue = true
433
- while continue do
434
- @logger.debug("S3: upload worker is waiting for a new file to upload.", :worker_id => worker_id)
320
+ def upload_file(temp_file)
321
+ @logger.debug("Queue for upload", :path => temp_file.path)
435
322
 
436
- continue = upload_worker
437
- end
438
- end
323
+ # if the queue is full the calling thread will be used to upload
324
+ temp_file.close # make sure the content is on disk
325
+ if temp_file.size > 0
326
+ @uploader.upload_async(temp_file,
327
+ :on_complete => method(:clean_temporary_file),
328
+ :upload_options => upload_options )
439
329
  end
440
330
  end
441
331
 
442
- private
443
- def upload_worker
444
- file = nil
445
- begin
446
- file = @upload_queue.deq
447
-
448
- if file == LogStash::SHUTDOWN
449
- @logger.debug("S3: upload worker is shutting down gracefuly")
450
- @upload_queue.enq(LogStash::SHUTDOWN)
451
- false
452
- else
453
- @logger.debug("S3: upload working is uploading a new file", :filename => File.basename(file))
454
- move_file_to_bucket(file)
455
- true
456
- end
457
- rescue Exception => ex
458
- @logger.error("failed to upload, will re-enqueue #{file} for upload",
459
- :ex => ex, :backtrace => ex.backtrace)
460
- unless file.nil? # Rare case if the first line of the begin doesn't execute
461
- @upload_queue.enq(file)
462
- end
463
- true
332
+ def rotation_strategy
333
+ case @rotation_strategy
334
+ when "size"
335
+ SizeRotationPolicy.new(size_file)
336
+ when "time"
337
+ TimeRotationPolicy.new(time_file)
338
+ when "size_and_time"
339
+ SizeAndTimeRotationPolicy.new(size_file, time_file)
464
340
  end
465
341
  end
466
342
 
467
- private
468
- def next_page
469
- @page_counter += 1
343
+ def clean_temporary_file(file)
344
+ @logger.debug("Removing temporary file", :file => file.path)
345
+ file.delete!
470
346
  end
471
347
 
472
- private
473
- def reset_page_counter
474
- @page_counter = 0
475
- end
476
-
477
- private
478
- def move_file_to_bucket_async(file)
479
- @logger.debug("S3: Sending the file to the upload queue.", :filename => File.basename(file))
480
- @upload_queue.enq(file)
348
+ # The upload process will use a separate uploader/threadpool with less resource allocated to it.
349
+ # but it will use an unbounded queue for the work, it may take some time before all the older files get processed.
350
+ def restore_from_crash
351
+ @crash_uploader = Uploader.new(bucket_resource, @logger, CRASH_RECOVERY_THREADPOOL)
352
+
353
+ temp_folder_path = Pathname.new(@temporary_directory)
354
+ Dir.glob(::File.join(@temporary_directory, "**/*"))
355
+ .select { |file| ::File.file?(file) }
356
+ .each do |file|
357
+ temp_file = TemporaryFile.create_from_existing_file(file, temp_folder_path)
358
+ @logger.debug("Recovering from crash and uploading", :file => temp_file.path)
359
+ @crash_uploader.upload_async(temp_file, :on_complete => method(:clean_temporary_file))
360
+ end
481
361
  end
482
362
  end