logstash-output-google_cloud_storage 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9181dddb40215cd78a1b07d3b327df10e7dd678b3eab3d34f511a84f11a92a6
4
- data.tar.gz: 4696f52738d5e20466b8639565a7eeee646b19e27e0421fc39c7cc283359a07b
3
+ metadata.gz: ef57485cd166eb205939da40bb4db73428955388af8e5c13d313852eb8c297c7
4
+ data.tar.gz: 8d3e0f581f611a9c7148ecc9f871b54e8c7ebf356ea3da39a3fa0a187d710184
5
5
  SHA512:
6
- metadata.gz: 93b09c005fc9cd4afdca9678b14949f35ad5d07ec92cb5f1ffb2ab20d8228f6e46e72e197846d1704cc2575f9bbf8c603e03ebf355b0423f6de6e42f6cb698dc
7
- data.tar.gz: 05e5bd961335f99458a4c46d5b271bcfd1279c6349817a483c9c65c1c62b0f1efeac48221b5c3c0e8869e0603caedcb5ece6b1d6da21a21641341fd306769549
6
+ metadata.gz: 022b0a599c17c5a9dc062093662556f2cfd15c7d2345527b92a4b9bda4fb2fa3756e837bbe471a60bf2034ce2a6b059f76b2ec01c1d27ba6b54575f628e46bc8
7
+ data.tar.gz: b603dea8edb673e1a1e0c72a5431d5d915ade37d1f6164fb3b40882bdecb936a6959ec16e6724d239ca26e87b651c6a8602cfbd439d82d824bb14781611a8bd0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 3.2.0
2
+ - Change uploads to use a job pool for better performance
3
+ - Fixes [#22](https://github.com/logstash-plugins/logstash-output-google_cloud_storage/issues/22) - Refactor Job Queue Architecture
4
+ - Fixes [#5](https://github.com/logstash-plugins/logstash-output-google_cloud_storage/issues/5) - Major Performance Issues
5
+ - Wait for files to upload before Logstash quits
6
+ - Fixes [#15](https://github.com/logstash-plugins/logstash-output-google_cloud_storage/issues/15) - Fails to upload files when Logstash exits
7
+
1
8
  ## 3.1.0
2
9
  - Add support for disabling hostname in the log file names
3
10
  - Add support for adding a UUID to the log file names
data/docs/index.asciidoc CHANGED
@@ -88,6 +88,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
88
88
  | <<plugins-{type}s-{plugin}-key_password>> |<<string,string>>|No
89
89
  | <<plugins-{type}s-{plugin}-key_path>> |<<string,string>>|Yes
90
90
  | <<plugins-{type}s-{plugin}-log_file_prefix>> |<<string,string>>|No
91
+ | <<plugins-{type}s-{plugin}-max_concurrent_uploads>> |<<number,number>>|No
91
92
  | <<plugins-{type}s-{plugin}-max_file_size_kbytes>> |<<number,number>>|No
92
93
  | <<plugins-{type}s-{plugin}-output_format>> |<<string,string>>, one of `["json", "plain"]`|No
93
94
  | <<plugins-{type}s-{plugin}-service_account>> |<<string,string>>|Yes
@@ -187,6 +188,17 @@ GCS path to private key file.
187
188
  Log file prefix. Log file will follow the format:
188
189
  <prefix>_hostname_date<.part?>.log
189
190
 
191
+ [id="plugins-{type}s-{plugin}-max_concurrent_uploads"]
192
+ ===== `max_concurrent_uploads`
193
+
194
+ * Value type is <<number,number>>
195
+ * Default value is `5`
196
+
197
+ Sets the maximum number of concurrent uploads to Cloud Storage at a time.
198
+ Uploads are I/O bound so it makes sense to tune this paramater with regards
199
+ to the network bandwidth available and the latency between your server and
200
+ Cloud Storage.
201
+
190
202
  [id="plugins-{type}s-{plugin}-max_file_size_kbytes"]
191
203
  ===== `max_file_size_kbytes`
192
204
 
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require 'thread'
3
+ require 'concurrent'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module Gcs
8
+ # WorkerPool creates a pool of workers that can handle jobs.
9
+ class WorkerPool
10
+ attr_reader :workers
11
+
12
+ def initialize(max_threads, synchronous=false)
13
+ @synchronous = synchronous
14
+
15
+ # set queue depth to the be the same as the number of threads so
16
+ # there's at most one pending job each when the plugin quits
17
+ @workers = Concurrent::ThreadPoolExecutor.new(
18
+ min_threads: 1,
19
+ max_threads: max_threads,
20
+ max_queue: max_threads,
21
+ fallback_policy: :caller_runs
22
+ )
23
+ end
24
+
25
+ # Submits a job to the worker pool, raises an error if the pool has
26
+ # already been stopped.
27
+ def post(&block)
28
+ raise 'Pool already stopped' unless @workers.running?
29
+
30
+ if @synchronous
31
+ block.call
32
+ else
33
+ @workers.post do
34
+ block.call
35
+ end
36
+ end
37
+ end
38
+
39
+ # Stops the worker pool
40
+ def stop!
41
+ @workers.shutdown
42
+ @workers.wait_for_termination
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -20,8 +20,11 @@
20
20
  # -----
21
21
  require "logstash/outputs/base"
22
22
  require "logstash/outputs/gcs/path_factory"
23
+ require "logstash/outputs/gcs/worker_pool"
23
24
  require "logstash/namespace"
24
25
  require "logstash/json"
26
+ require "stud/interval"
27
+ require "thread"
25
28
  require "zlib"
26
29
 
27
30
  # Summary: plugin to upload log events to Google Cloud Storage (GCS), rolling
@@ -71,8 +74,6 @@ require "zlib"
71
74
  # * There's no recover method, so if logstash/plugin crashes, files may not
72
75
  # be uploaded to GCS.
73
76
  # * Allow user to configure file name.
74
- # * Allow parallel uploads for heavier loads (+ connection configuration if
75
- # exposed by Ruby API client)
76
77
  class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
77
78
  config_name "google_cloud_storage"
78
79
 
@@ -137,28 +138,22 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
137
138
  # When this feature is enabled, the uploader_interval_secs option has no effect.
138
139
  config :upload_synchronous, :validate => :boolean, :default => false
139
140
 
141
+ config :max_concurrent_uploads, :validate => :number, :default => 5
142
+
140
143
  public
141
144
  def register
142
145
  require "fileutils"
143
- require "thread"
144
-
145
146
  @logger.debug("GCS: register plugin")
146
-
147
147
  @last_flush_cycle = Time.now
148
148
 
149
- unless upload_synchronous
150
- initialize_upload_queue()
151
- end
152
-
149
+ @workers = LogStash::Outputs::Gcs::WorkerPool.new(@max_concurrent_uploads, @upload_synchronous)
153
150
  initialize_temp_directory()
154
151
  initialize_path_factory
155
152
  open_current_file
156
153
 
157
154
  initialize_google_client()
158
155
 
159
- unless upload_synchronous
160
- @uploader = start_uploader
161
- end
156
+ start_uploader
162
157
 
163
158
  if @gzip
164
159
  @content_type = 'application/gzip'
@@ -180,22 +175,7 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
180
175
  end
181
176
 
182
177
  # Time to roll file based on the date pattern? Or is it over the size limit?
183
- if (@path_factory.should_rotate? || (@max_file_size_kbytes > 0 && @temp_file.size >= @max_file_size_kbytes * 1024))
184
- @logger.debug("GCS: log file will be closed and uploaded",
185
- :filename => File.basename(@temp_file.to_path),
186
- :size => @temp_file.size.to_s,
187
- :max_size => @max_file_size_kbytes.to_s)
188
- # Close does not guarantee that data is physically written to disk.
189
- @temp_file.fsync()
190
- @temp_file.close()
191
-
192
- if upload_synchronous
193
- upload_object(@temp_file.to_path)
194
- File.delete(@temp_file.to_path)
195
- end
196
-
197
- initialize_next_log()
198
- end
178
+ initialize_next_log if ready_to_rotate?
199
179
 
200
180
  @temp_file.write(message)
201
181
  @temp_file.write("\n")
@@ -208,21 +188,24 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
208
188
 
209
189
  public
210
190
  def close
211
- @logger.debug("GCS: close method called")
191
+ @logger.debug('Stopping the plugin, uploading the remaining files.')
212
192
 
213
- @temp_file.fsync()
214
- filename = @temp_file.to_path
215
- size = @temp_file.size
216
- @temp_file.close()
193
+ Stud.stop!(@registration_thread) unless @registration_thread.nil?
217
194
 
218
- if upload_synchronous && size > 0
219
- @logger.debug("GCS: uploading last file of #{size.to_s}b")
220
- upload_object(filename)
221
- File.delete(filename)
222
- end
195
+ close_and_upload_current
196
+ @workers.stop!
223
197
  end
224
198
 
225
199
  private
200
+
201
+
202
+ def ready_to_rotate?
203
+ path_changed = @path_factory.should_rotate?
204
+ too_big = @max_file_size_kbytes > 0 && @temp_file.size >= @max_file_size_kbytes * 1024
205
+
206
+ path_changed || too_big
207
+ end
208
+
226
209
  ##
227
210
  # Flushes temporary log file every flush_interval_secs seconds or so.
228
211
  # This is triggered by events, but if there are no events there's no point
@@ -276,54 +259,17 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
276
259
 
277
260
  def start_uploader
278
261
  Thread.new do
279
- @logger.debug("GCS: starting uploader")
280
- while true
281
- upload_from_queue()
282
- end
283
- end
284
- end
285
- ##
286
- # Uploads log files.
287
- #
288
- # Uploader is done in a separate thread, not holding the receive method above.
289
- def upload_from_queue
290
- filename = @upload_queue.pop
291
-
292
- # Reenqueue if it is still the current file.
293
- if filename == @temp_file.to_path
294
- if !@path_factory.should_rotate?
295
- @logger.debug("GCS: reenqueue as log file is being currently appended to.",
296
- :filename => filename)
297
- @upload_queue << filename
298
- # If we got here, it means that older files were uploaded, so let's
299
- # wait another minute before checking on this file again.
300
- sleep @uploader_interval_secs
301
- return
302
- else
303
- @logger.debug("GCS: flush and close file to be uploaded.",
304
- :filename => filename)
305
- @temp_file.fsync()
306
- @temp_file.close()
307
- initialize_next_log()
262
+ @registration_thread = Thread.current
263
+ Stud.interval(@uploader_interval_secs) do
264
+ initialize_next_log if ready_to_rotate?
308
265
  end
309
266
  end
310
-
311
- if File.stat(filename).size > 0
312
- upload_object(filename)
313
- else
314
- @logger.debug("GCS: file size is zero, skip upload.",
315
- :filename => filename,
316
- :filesize => File.stat(filename).size)
317
- end
318
- @logger.debug("GCS: delete local temporary file ",
319
- :filename => filename)
320
- File.delete(filename)
321
267
  end
322
268
 
323
269
  ##
324
270
  # Opens current log file and updates @temp_file with an instance of IOWriter.
325
271
  # This method also adds file to the upload queue.
326
- def open_current_file()
272
+ def open_current_file
327
273
  path = @path_factory.current_path
328
274
 
329
275
  stat = File.stat(path) rescue nil
@@ -336,9 +282,6 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
336
282
  fd = Zlib::GzipWriter.new(fd)
337
283
  end
338
284
  @temp_file = GCSIOWriter.new(fd)
339
- unless upload_synchronous
340
- @upload_queue << @temp_file.to_path
341
- end
342
285
  end
343
286
 
344
287
  ##
@@ -347,6 +290,7 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
347
290
  # the base log file name is the same (e.g. log file was not rolled given the
348
291
  # date pattern).
349
292
  def initialize_next_log
293
+ close_and_upload_current
350
294
  @path_factory.rotate_path!
351
295
  open_current_file()
352
296
  end
@@ -369,15 +313,6 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
369
313
  @client.authorization = service_account.authorize
370
314
  end
371
315
 
372
- # Initialize the queue that harbors files to be uploaded
373
- def initialize_upload_queue
374
- @upload_queue = new_upload_queue()
375
- end
376
-
377
- def new_upload_queue
378
- Queue.new
379
- end
380
-
381
316
  ##
382
317
  # Uploads a local file to the configured bucket.
383
318
  def upload_object(filename)
@@ -404,6 +339,32 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
404
339
  retry
405
340
  end
406
341
  end
342
+
343
+ def close_and_upload_current
344
+ return if @temp_file.nil?
345
+
346
+ filename = @temp_file.to_path
347
+ @temp_file.fsync
348
+ @temp_file.close
349
+ @logger.info("Uploading file: #{filename}")
350
+
351
+ @workers.post do
352
+ upload_and_delete(filename)
353
+ end
354
+ end
355
+
356
+ def upload_and_delete(filename)
357
+ file_size = File.stat(filename).size
358
+
359
+ if file_size > 0
360
+ upload_object(filename)
361
+ else
362
+ @logger.debug('File size is zero, skip upload.', :filename => filename)
363
+ end
364
+
365
+ @logger.debug('Delete local temporary file', :filename => filename)
366
+ File.delete(filename)
367
+ end
407
368
  end
408
369
 
409
370
  ##
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-google_cloud_storage'
3
- s.version = '3.1.0'
3
+ s.version = '3.2.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "plugin to upload log events to Google Cloud Storage (GCS)"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -25,7 +25,7 @@ Gem::Specification.new do |s|
25
25
  s.add_runtime_dependency 'google-api-client', '~> 0.8.7' # version 0.9.x works only with ruby 2.x
26
26
  s.add_runtime_dependency 'logstash-codec-plain'
27
27
  s.add_runtime_dependency 'mime-types', '~> 2' # last version compatible with ruby 2.x
28
-
28
+ s.add_runtime_dependency 'concurrent-ruby', '1.0.5'
29
29
  s.add_development_dependency 'logstash-devutils'
30
30
  end
31
31
 
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+ require 'logstash/outputs/gcs/worker_pool'
3
+
4
+ describe LogStash::Outputs::Gcs::WorkerPool do
5
+ describe '#post' do
6
+ it 'runs the task in the same thread if synchronous' do
7
+ pool = LogStash::Outputs::Gcs::WorkerPool.new(5, true)
8
+ expect(pool.workers).to_not receive(:post)
9
+
10
+ pool.post { 1 + 2 }
11
+ end
12
+
13
+ it 'runs the task in a different thread if asynchronous' do
14
+ pool = LogStash::Outputs::Gcs::WorkerPool.new(5, false)
15
+ expect(pool.workers).to receive(:post)
16
+
17
+ pool.post { 1 + 2 }
18
+ end
19
+
20
+ it 'raises an error if the pool is already stopped' do
21
+ pool = LogStash::Outputs::Gcs::WorkerPool.new(5, true)
22
+ pool.stop!
23
+
24
+ expect{ pool.post{} }.to raise_error(RuntimeError)
25
+ end
26
+ end
27
+ end
@@ -4,13 +4,13 @@ require "google/api_client"
4
4
  require "tempfile"
5
5
 
6
6
  describe LogStash::Outputs::GoogleCloudStorage do
7
-
7
+
8
8
  let(:client) { double("google-client") }
9
9
  let(:service_account) { double("service-account") }
10
10
  let(:key) { "key" }
11
11
 
12
12
  subject { described_class.new(config) }
13
- let(:config) { {"bucket" => "", "key_path" => "", "service_account" => "", "uploader_interval_secs" => 0.1 } }
13
+ let(:config) { {"bucket" => "", "key_path" => "", "service_account" => "", "uploader_interval_secs" => 0.1, "upload_synchronous" => true} }
14
14
 
15
15
  before(:each) do
16
16
  allow(Google::APIClient).to receive(:new).and_return(client)
@@ -24,36 +24,4 @@ describe LogStash::Outputs::GoogleCloudStorage do
24
24
  it "should register without errors" do
25
25
  expect { subject.register }.to_not raise_error
26
26
  end
27
-
28
- describe "file size based decider for uploading" do
29
- let(:upload_queue) { Queue.new }
30
- let(:content) { }
31
- before(:each) do
32
- allow(subject).to receive(:new_upload_queue).and_return(upload_queue)
33
- subject.send(:initialize_upload_queue)
34
- subject.send(:initialize_temp_directory)
35
- subject.send(:initialize_path_factory)
36
- subject.send(:open_current_file)
37
- current_file = upload_queue.pop
38
- File.write(current_file, content) if content
39
- upload_queue.push(current_file)
40
- subject.send(:initialize_next_log)
41
- end
42
-
43
- context "when spooled file is empty" do
44
- let(:content) { nil }
45
- it "doesn't get uploaded" do
46
- expect(subject).to_not receive(:upload_object)
47
- subject.send(:upload_from_queue)
48
- end
49
- end
50
-
51
- context "when spooled file has content" do
52
- let(:content) { "hello" }
53
- it "gets uploaded" do
54
- expect(subject).to receive(:upload_object)
55
- subject.send(:upload_from_queue)
56
- end
57
- end
58
- end
59
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-google_cloud_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-16 00:00:00.000000000 Z
11
+ date: 2018-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -86,6 +86,20 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '2'
89
+ - !ruby/object:Gem::Dependency
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '='
93
+ - !ruby/object:Gem::Version
94
+ version: 1.0.5
95
+ name: concurrent-ruby
96
+ prerelease: false
97
+ type: :runtime
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - '='
101
+ - !ruby/object:Gem::Version
102
+ version: 1.0.5
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
@@ -116,9 +130,11 @@ files:
116
130
  - README.md
117
131
  - docs/index.asciidoc
118
132
  - lib/logstash/outputs/gcs/path_factory.rb
133
+ - lib/logstash/outputs/gcs/worker_pool.rb
119
134
  - lib/logstash/outputs/google_cloud_storage.rb
120
135
  - logstash-output-google_cloud_storage.gemspec
121
136
  - spec/outputs/gcs/path_factory_spec.rb
137
+ - spec/outputs/gcs/worker_pool_spec.rb
122
138
  - spec/outputs/google_cloud_storage_spec.rb
123
139
  - spec/spec_helper.rb
124
140
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
@@ -149,5 +165,6 @@ specification_version: 4
149
165
  summary: plugin to upload log events to Google Cloud Storage (GCS)
150
166
  test_files:
151
167
  - spec/outputs/gcs/path_factory_spec.rb
168
+ - spec/outputs/gcs/worker_pool_spec.rb
152
169
  - spec/outputs/google_cloud_storage_spec.rb
153
170
  - spec/spec_helper.rb