logstash-output-google_cloud_storage 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9181dddb40215cd78a1b07d3b327df10e7dd678b3eab3d34f511a84f11a92a6
4
- data.tar.gz: 4696f52738d5e20466b8639565a7eeee646b19e27e0421fc39c7cc283359a07b
3
+ metadata.gz: ef57485cd166eb205939da40bb4db73428955388af8e5c13d313852eb8c297c7
4
+ data.tar.gz: 8d3e0f581f611a9c7148ecc9f871b54e8c7ebf356ea3da39a3fa0a187d710184
5
5
  SHA512:
6
- metadata.gz: 93b09c005fc9cd4afdca9678b14949f35ad5d07ec92cb5f1ffb2ab20d8228f6e46e72e197846d1704cc2575f9bbf8c603e03ebf355b0423f6de6e42f6cb698dc
7
- data.tar.gz: 05e5bd961335f99458a4c46d5b271bcfd1279c6349817a483c9c65c1c62b0f1efeac48221b5c3c0e8869e0603caedcb5ece6b1d6da21a21641341fd306769549
6
+ metadata.gz: 022b0a599c17c5a9dc062093662556f2cfd15c7d2345527b92a4b9bda4fb2fa3756e837bbe471a60bf2034ce2a6b059f76b2ec01c1d27ba6b54575f628e46bc8
7
+ data.tar.gz: b603dea8edb673e1a1e0c72a5431d5d915ade37d1f6164fb3b40882bdecb936a6959ec16e6724d239ca26e87b651c6a8602cfbd439d82d824bb14781611a8bd0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 3.2.0
2
+ - Change uploads to use a job pool for better performance
3
+ - Fixes [#22](https://github.com/logstash-plugins/logstash-output-google_cloud_storage/issues/22) - Refactor Job Queue Architecture
4
+ - Fixes [#5](https://github.com/logstash-plugins/logstash-output-google_cloud_storage/issues/5) - Major Performance Issues
5
+ - Wait for files to upload before Logstash quits
6
+ - Fixes [#15](https://github.com/logstash-plugins/logstash-output-google_cloud_storage/issues/15) - Fails to upload files when Logstash exits
7
+
1
8
  ## 3.1.0
2
9
  - Add support for disabling hostname in the log file names
3
10
  - Add support for adding a UUID to the log file names
data/docs/index.asciidoc CHANGED
@@ -88,6 +88,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
88
88
  | <<plugins-{type}s-{plugin}-key_password>> |<<string,string>>|No
89
89
  | <<plugins-{type}s-{plugin}-key_path>> |<<string,string>>|Yes
90
90
  | <<plugins-{type}s-{plugin}-log_file_prefix>> |<<string,string>>|No
91
+ | <<plugins-{type}s-{plugin}-max_concurrent_uploads>> |<<number,number>>|No
91
92
  | <<plugins-{type}s-{plugin}-max_file_size_kbytes>> |<<number,number>>|No
92
93
  | <<plugins-{type}s-{plugin}-output_format>> |<<string,string>>, one of `["json", "plain"]`|No
93
94
  | <<plugins-{type}s-{plugin}-service_account>> |<<string,string>>|Yes
@@ -187,6 +188,17 @@ GCS path to private key file.
187
188
  Log file prefix. Log file will follow the format:
188
189
  <prefix>_hostname_date<.part?>.log
189
190
 
191
+ [id="plugins-{type}s-{plugin}-max_concurrent_uploads"]
192
+ ===== `max_concurrent_uploads`
193
+
194
+ * Value type is <<number,number>>
195
+ * Default value is `5`
196
+
197
+ Sets the maximum number of concurrent uploads to Cloud Storage at a time.
198
+ Uploads are I/O bound so it makes sense to tune this paramater with regards
199
+ to the network bandwidth available and the latency between your server and
200
+ Cloud Storage.
201
+
190
202
  [id="plugins-{type}s-{plugin}-max_file_size_kbytes"]
191
203
  ===== `max_file_size_kbytes`
192
204
 
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require 'thread'
3
+ require 'concurrent'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module Gcs
8
+ # WorkerPool creates a pool of workers that can handle jobs.
9
+ class WorkerPool
10
+ attr_reader :workers
11
+
12
+ def initialize(max_threads, synchronous=false)
13
+ @synchronous = synchronous
14
+
15
+ # set queue depth to the be the same as the number of threads so
16
+ # there's at most one pending job each when the plugin quits
17
+ @workers = Concurrent::ThreadPoolExecutor.new(
18
+ min_threads: 1,
19
+ max_threads: max_threads,
20
+ max_queue: max_threads,
21
+ fallback_policy: :caller_runs
22
+ )
23
+ end
24
+
25
+ # Submits a job to the worker pool, raises an error if the pool has
26
+ # already been stopped.
27
+ def post(&block)
28
+ raise 'Pool already stopped' unless @workers.running?
29
+
30
+ if @synchronous
31
+ block.call
32
+ else
33
+ @workers.post do
34
+ block.call
35
+ end
36
+ end
37
+ end
38
+
39
+ # Stops the worker pool
40
+ def stop!
41
+ @workers.shutdown
42
+ @workers.wait_for_termination
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -20,8 +20,11 @@
20
20
  # -----
21
21
  require "logstash/outputs/base"
22
22
  require "logstash/outputs/gcs/path_factory"
23
+ require "logstash/outputs/gcs/worker_pool"
23
24
  require "logstash/namespace"
24
25
  require "logstash/json"
26
+ require "stud/interval"
27
+ require "thread"
25
28
  require "zlib"
26
29
 
27
30
  # Summary: plugin to upload log events to Google Cloud Storage (GCS), rolling
@@ -71,8 +74,6 @@ require "zlib"
71
74
  # * There's no recover method, so if logstash/plugin crashes, files may not
72
75
  # be uploaded to GCS.
73
76
  # * Allow user to configure file name.
74
- # * Allow parallel uploads for heavier loads (+ connection configuration if
75
- # exposed by Ruby API client)
76
77
  class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
77
78
  config_name "google_cloud_storage"
78
79
 
@@ -137,28 +138,22 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
137
138
  # When this feature is enabled, the uploader_interval_secs option has no effect.
138
139
  config :upload_synchronous, :validate => :boolean, :default => false
139
140
 
141
+ config :max_concurrent_uploads, :validate => :number, :default => 5
142
+
140
143
  public
141
144
  def register
142
145
  require "fileutils"
143
- require "thread"
144
-
145
146
  @logger.debug("GCS: register plugin")
146
-
147
147
  @last_flush_cycle = Time.now
148
148
 
149
- unless upload_synchronous
150
- initialize_upload_queue()
151
- end
152
-
149
+ @workers = LogStash::Outputs::Gcs::WorkerPool.new(@max_concurrent_uploads, @upload_synchronous)
153
150
  initialize_temp_directory()
154
151
  initialize_path_factory
155
152
  open_current_file
156
153
 
157
154
  initialize_google_client()
158
155
 
159
- unless upload_synchronous
160
- @uploader = start_uploader
161
- end
156
+ start_uploader
162
157
 
163
158
  if @gzip
164
159
  @content_type = 'application/gzip'
@@ -180,22 +175,7 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
180
175
  end
181
176
 
182
177
  # Time to roll file based on the date pattern? Or is it over the size limit?
183
- if (@path_factory.should_rotate? || (@max_file_size_kbytes > 0 && @temp_file.size >= @max_file_size_kbytes * 1024))
184
- @logger.debug("GCS: log file will be closed and uploaded",
185
- :filename => File.basename(@temp_file.to_path),
186
- :size => @temp_file.size.to_s,
187
- :max_size => @max_file_size_kbytes.to_s)
188
- # Close does not guarantee that data is physically written to disk.
189
- @temp_file.fsync()
190
- @temp_file.close()
191
-
192
- if upload_synchronous
193
- upload_object(@temp_file.to_path)
194
- File.delete(@temp_file.to_path)
195
- end
196
-
197
- initialize_next_log()
198
- end
178
+ initialize_next_log if ready_to_rotate?
199
179
 
200
180
  @temp_file.write(message)
201
181
  @temp_file.write("\n")
@@ -208,21 +188,24 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
208
188
 
209
189
  public
210
190
  def close
211
- @logger.debug("GCS: close method called")
191
+ @logger.debug('Stopping the plugin, uploading the remaining files.')
212
192
 
213
- @temp_file.fsync()
214
- filename = @temp_file.to_path
215
- size = @temp_file.size
216
- @temp_file.close()
193
+ Stud.stop!(@registration_thread) unless @registration_thread.nil?
217
194
 
218
- if upload_synchronous && size > 0
219
- @logger.debug("GCS: uploading last file of #{size.to_s}b")
220
- upload_object(filename)
221
- File.delete(filename)
222
- end
195
+ close_and_upload_current
196
+ @workers.stop!
223
197
  end
224
198
 
225
199
  private
200
+
201
+
202
+ def ready_to_rotate?
203
+ path_changed = @path_factory.should_rotate?
204
+ too_big = @max_file_size_kbytes > 0 && @temp_file.size >= @max_file_size_kbytes * 1024
205
+
206
+ path_changed || too_big
207
+ end
208
+
226
209
  ##
227
210
  # Flushes temporary log file every flush_interval_secs seconds or so.
228
211
  # This is triggered by events, but if there are no events there's no point
@@ -276,54 +259,17 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
276
259
 
277
260
  def start_uploader
278
261
  Thread.new do
279
- @logger.debug("GCS: starting uploader")
280
- while true
281
- upload_from_queue()
282
- end
283
- end
284
- end
285
- ##
286
- # Uploads log files.
287
- #
288
- # Uploader is done in a separate thread, not holding the receive method above.
289
- def upload_from_queue
290
- filename = @upload_queue.pop
291
-
292
- # Reenqueue if it is still the current file.
293
- if filename == @temp_file.to_path
294
- if !@path_factory.should_rotate?
295
- @logger.debug("GCS: reenqueue as log file is being currently appended to.",
296
- :filename => filename)
297
- @upload_queue << filename
298
- # If we got here, it means that older files were uploaded, so let's
299
- # wait another minute before checking on this file again.
300
- sleep @uploader_interval_secs
301
- return
302
- else
303
- @logger.debug("GCS: flush and close file to be uploaded.",
304
- :filename => filename)
305
- @temp_file.fsync()
306
- @temp_file.close()
307
- initialize_next_log()
262
+ @registration_thread = Thread.current
263
+ Stud.interval(@uploader_interval_secs) do
264
+ initialize_next_log if ready_to_rotate?
308
265
  end
309
266
  end
310
-
311
- if File.stat(filename).size > 0
312
- upload_object(filename)
313
- else
314
- @logger.debug("GCS: file size is zero, skip upload.",
315
- :filename => filename,
316
- :filesize => File.stat(filename).size)
317
- end
318
- @logger.debug("GCS: delete local temporary file ",
319
- :filename => filename)
320
- File.delete(filename)
321
267
  end
322
268
 
323
269
  ##
324
270
  # Opens current log file and updates @temp_file with an instance of IOWriter.
325
271
  # This method also adds file to the upload queue.
326
- def open_current_file()
272
+ def open_current_file
327
273
  path = @path_factory.current_path
328
274
 
329
275
  stat = File.stat(path) rescue nil
@@ -336,9 +282,6 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
336
282
  fd = Zlib::GzipWriter.new(fd)
337
283
  end
338
284
  @temp_file = GCSIOWriter.new(fd)
339
- unless upload_synchronous
340
- @upload_queue << @temp_file.to_path
341
- end
342
285
  end
343
286
 
344
287
  ##
@@ -347,6 +290,7 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
347
290
  # the base log file name is the same (e.g. log file was not rolled given the
348
291
  # date pattern).
349
292
  def initialize_next_log
293
+ close_and_upload_current
350
294
  @path_factory.rotate_path!
351
295
  open_current_file()
352
296
  end
@@ -369,15 +313,6 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
369
313
  @client.authorization = service_account.authorize
370
314
  end
371
315
 
372
- # Initialize the queue that harbors files to be uploaded
373
- def initialize_upload_queue
374
- @upload_queue = new_upload_queue()
375
- end
376
-
377
- def new_upload_queue
378
- Queue.new
379
- end
380
-
381
316
  ##
382
317
  # Uploads a local file to the configured bucket.
383
318
  def upload_object(filename)
@@ -404,6 +339,32 @@ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
404
339
  retry
405
340
  end
406
341
  end
342
+
343
+ def close_and_upload_current
344
+ return if @temp_file.nil?
345
+
346
+ filename = @temp_file.to_path
347
+ @temp_file.fsync
348
+ @temp_file.close
349
+ @logger.info("Uploading file: #{filename}")
350
+
351
+ @workers.post do
352
+ upload_and_delete(filename)
353
+ end
354
+ end
355
+
356
+ def upload_and_delete(filename)
357
+ file_size = File.stat(filename).size
358
+
359
+ if file_size > 0
360
+ upload_object(filename)
361
+ else
362
+ @logger.debug('File size is zero, skip upload.', :filename => filename)
363
+ end
364
+
365
+ @logger.debug('Delete local temporary file', :filename => filename)
366
+ File.delete(filename)
367
+ end
407
368
  end
408
369
 
409
370
  ##
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-google_cloud_storage'
3
- s.version = '3.1.0'
3
+ s.version = '3.2.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "plugin to upload log events to Google Cloud Storage (GCS)"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -25,7 +25,7 @@ Gem::Specification.new do |s|
25
25
  s.add_runtime_dependency 'google-api-client', '~> 0.8.7' # version 0.9.x works only with ruby 2.x
26
26
  s.add_runtime_dependency 'logstash-codec-plain'
27
27
  s.add_runtime_dependency 'mime-types', '~> 2' # last version compatible with ruby 2.x
28
-
28
+ s.add_runtime_dependency 'concurrent-ruby', '1.0.5'
29
29
  s.add_development_dependency 'logstash-devutils'
30
30
  end
31
31
 
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+ require 'logstash/outputs/gcs/worker_pool'
3
+
4
+ describe LogStash::Outputs::Gcs::WorkerPool do
5
+ describe '#post' do
6
+ it 'runs the task in the same thread if synchronous' do
7
+ pool = LogStash::Outputs::Gcs::WorkerPool.new(5, true)
8
+ expect(pool.workers).to_not receive(:post)
9
+
10
+ pool.post { 1 + 2 }
11
+ end
12
+
13
+ it 'runs the task in a different thread if asynchronous' do
14
+ pool = LogStash::Outputs::Gcs::WorkerPool.new(5, false)
15
+ expect(pool.workers).to receive(:post)
16
+
17
+ pool.post { 1 + 2 }
18
+ end
19
+
20
+ it 'raises an error if the pool is already stopped' do
21
+ pool = LogStash::Outputs::Gcs::WorkerPool.new(5, true)
22
+ pool.stop!
23
+
24
+ expect{ pool.post{} }.to raise_error(RuntimeError)
25
+ end
26
+ end
27
+ end
@@ -4,13 +4,13 @@ require "google/api_client"
4
4
  require "tempfile"
5
5
 
6
6
  describe LogStash::Outputs::GoogleCloudStorage do
7
-
7
+
8
8
  let(:client) { double("google-client") }
9
9
  let(:service_account) { double("service-account") }
10
10
  let(:key) { "key" }
11
11
 
12
12
  subject { described_class.new(config) }
13
- let(:config) { {"bucket" => "", "key_path" => "", "service_account" => "", "uploader_interval_secs" => 0.1 } }
13
+ let(:config) { {"bucket" => "", "key_path" => "", "service_account" => "", "uploader_interval_secs" => 0.1, "upload_synchronous" => true} }
14
14
 
15
15
  before(:each) do
16
16
  allow(Google::APIClient).to receive(:new).and_return(client)
@@ -24,36 +24,4 @@ describe LogStash::Outputs::GoogleCloudStorage do
24
24
  it "should register without errors" do
25
25
  expect { subject.register }.to_not raise_error
26
26
  end
27
-
28
- describe "file size based decider for uploading" do
29
- let(:upload_queue) { Queue.new }
30
- let(:content) { }
31
- before(:each) do
32
- allow(subject).to receive(:new_upload_queue).and_return(upload_queue)
33
- subject.send(:initialize_upload_queue)
34
- subject.send(:initialize_temp_directory)
35
- subject.send(:initialize_path_factory)
36
- subject.send(:open_current_file)
37
- current_file = upload_queue.pop
38
- File.write(current_file, content) if content
39
- upload_queue.push(current_file)
40
- subject.send(:initialize_next_log)
41
- end
42
-
43
- context "when spooled file is empty" do
44
- let(:content) { nil }
45
- it "doesn't get uploaded" do
46
- expect(subject).to_not receive(:upload_object)
47
- subject.send(:upload_from_queue)
48
- end
49
- end
50
-
51
- context "when spooled file has content" do
52
- let(:content) { "hello" }
53
- it "gets uploaded" do
54
- expect(subject).to receive(:upload_object)
55
- subject.send(:upload_from_queue)
56
- end
57
- end
58
- end
59
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-google_cloud_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-16 00:00:00.000000000 Z
11
+ date: 2018-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -86,6 +86,20 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '2'
89
+ - !ruby/object:Gem::Dependency
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '='
93
+ - !ruby/object:Gem::Version
94
+ version: 1.0.5
95
+ name: concurrent-ruby
96
+ prerelease: false
97
+ type: :runtime
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - '='
101
+ - !ruby/object:Gem::Version
102
+ version: 1.0.5
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
@@ -116,9 +130,11 @@ files:
116
130
  - README.md
117
131
  - docs/index.asciidoc
118
132
  - lib/logstash/outputs/gcs/path_factory.rb
133
+ - lib/logstash/outputs/gcs/worker_pool.rb
119
134
  - lib/logstash/outputs/google_cloud_storage.rb
120
135
  - logstash-output-google_cloud_storage.gemspec
121
136
  - spec/outputs/gcs/path_factory_spec.rb
137
+ - spec/outputs/gcs/worker_pool_spec.rb
122
138
  - spec/outputs/google_cloud_storage_spec.rb
123
139
  - spec/spec_helper.rb
124
140
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
@@ -149,5 +165,6 @@ specification_version: 4
149
165
  summary: plugin to upload log events to Google Cloud Storage (GCS)
150
166
  test_files:
151
167
  - spec/outputs/gcs/path_factory_spec.rb
168
+ - spec/outputs/gcs/worker_pool_spec.rb
152
169
  - spec/outputs/google_cloud_storage_spec.rb
153
170
  - spec/spec_helper.rb