logstash-output-googlecloudstorage 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ # encoding: utf-8
2
+ require 'logstash/outputs/gcs/temp_log_file'
3
+ require 'concurrent'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module Gcs
8
+ class LogRotate
9
+ def initialize(path_factory, max_file_size_bytes, gzip, flush_interval_secs, gzip_encoded=false)
10
+ @path_factory = path_factory
11
+ @max_file_size_bytes = max_file_size_bytes
12
+ @gzip = gzip
13
+ @flush_interval_secs = flush_interval_secs
14
+ @gzip_encoded = gzip_encoded
15
+
16
+ @lock = Concurrent::ReentrantReadWriteLock.new
17
+ @rotate_callback = nil
18
+
19
+ rotate_log!
20
+ end
21
+
22
+ # writeln writes a message and carriage-return character to the open
23
+ # log file, rotating and syncing it if necessary.
24
+ #
25
+ # nil messages do not get written, but may cause the log to rotate
26
+ def writeln(message=nil)
27
+ @lock.with_write_lock do
28
+ rotate_log! if should_rotate?
29
+
30
+ @temp_file.write(message, "\n") unless message.nil?
31
+
32
+ @temp_file.fsync if @temp_file.time_since_sync >= @flush_interval_secs
33
+ end
34
+ end
35
+
36
+ # rotate_log! closes the current log (if it exists), notifies the
37
+ # handler, rolls the path over and opens a new log.
38
+ #
39
+ # Invariant: the old log will ALWAYS be closed and a new one will
40
+ # ALWAYS be open at the completion of this function.
41
+ def rotate_log!
42
+ @lock.with_write_lock do
43
+ unless @temp_file.nil?
44
+ @temp_file.close!
45
+ @rotate_callback.call(@temp_file.path) unless @rotate_callback.nil?
46
+ end
47
+
48
+ @path_factory.rotate_path!
49
+
50
+ path = @path_factory.current_path
51
+ @temp_file = LogStash::Outputs::Gcs::LogFileFactory.create(path, @gzip, true, @gzip_encoded)
52
+ end
53
+ end
54
+
55
+ # on_rotate sets a handler to be called when the log gets rotated.
56
+ # The handler receives the path to the rotated out log as a string.
57
+ def on_rotate(&block)
58
+ @lock.with_write_lock do
59
+ @rotate_callback = block
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def should_rotate?
66
+ @lock.with_read_lock do
67
+ path_changed = @path_factory.should_rotate?
68
+ rotate_on_size = @max_file_size_bytes > 0
69
+ too_big = @temp_file.size >= @max_file_size_bytes
70
+
71
+ path_changed || (rotate_on_size && too_big)
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,119 @@
1
+ # encoding: utf-8
2
+ require 'thread'
3
+
4
+ module LogStash
5
+ module Outputs
6
+ module Gcs
7
+ # PathFactory creates paths for rotating files.
8
+ class PathFactory
9
+ def initialize(directory, prefix, include_host, date_pattern, include_part, include_uuid, is_gzipped)
10
+ @path_lock = Mutex.new
11
+
12
+ pattern = prefix
13
+ pattern += '_%{host}' if include_host
14
+ pattern += '_%{date}'
15
+ @base_pattern = pattern
16
+
17
+ pattern += '.part%{partf}' if include_part
18
+ pattern += '.%{uuid}' if include_uuid
19
+ pattern += '.log'
20
+ pattern += '.gz' if is_gzipped
21
+ @pattern = pattern
22
+
23
+ @prefix = prefix
24
+ @directory = directory
25
+ @date_pattern = date_pattern
26
+
27
+ @part_number = starting_part
28
+ @current = template_variables
29
+ end
30
+
31
+ # Rotates the path to the next one in sequence. If the path has a part number
32
+ # and the base path (date/hostname) haven't changed the part number is incremented.
33
+ # Returns the path that was rotated out
34
+ def rotate_path!
35
+ last_path = current_path
36
+
37
+ @path_lock.synchronize {
38
+ @part_number = (next_base == current_base) ? @part_number + 1 : 0
39
+ @current = template_variables
40
+ }
41
+
42
+ last_path
43
+ end
44
+
45
+ # Checks if the file is ready to rotate because the timestamp changed.
46
+ def should_rotate?
47
+ @path_lock.synchronize {
48
+ next_base != current_base
49
+ }
50
+ end
51
+
52
+ # Returns the full path to the current file including parent directory.
53
+ def current_path(vars=nil)
54
+ @path_lock.synchronize {
55
+ filename = @pattern % (vars || @current)
56
+ ::File.join(@directory, filename)
57
+ }
58
+ end
59
+
60
+ private
61
+
62
+ # search through the directory for a file with the same base, and if it exists,
63
+ # set our part to be the max + 1 so we don't clobber existing files.
64
+ def starting_part
65
+ return 0 unless ::File.directory? @directory
66
+
67
+ base_path = ::File.join(@directory, next_base)
68
+
69
+ part_numbers = Dir.glob(base_path + '.part*').map do |item|
70
+ match = /^.*\.part(?<part_num>\d+).*$/.match(item)
71
+ next if match.nil?
72
+ match[:part_num].to_i
73
+ end
74
+
75
+ part_numbers.any? ? part_numbers.max + 1 : 0
76
+ end
77
+
78
+ def template_variables
79
+ {
80
+ prefix: @prefix,
81
+ host: Socket.gethostname,
82
+ date: Time.now.strftime(@date_pattern),
83
+ partf: '%03d' % @part_number,
84
+ uuid: SecureRandom.uuid
85
+ }
86
+ end
87
+
88
+ def next_base
89
+ @base_pattern % template_variables
90
+ end
91
+
92
+ def current_base
93
+ @base_pattern % @current
94
+ end
95
+ end
96
+
97
+ # PathFactoryBuilder makes the long PathFactory constructor chain more readable.
98
+ class PathFactoryBuilder
99
+ def self.build
100
+ builder = new
101
+ yield builder
102
+ builder.build_path_factory
103
+ end
104
+
105
+ def self.builder_setter(*names)
106
+ names.each do |name|
107
+ define_method("set_#{name}") {|arg| instance_variable_set("@#{name}", arg)}
108
+ end
109
+ end
110
+
111
+ builder_setter :directory, :prefix, :include_host, :date_pattern, :include_part, :include_uuid, :is_gzipped
112
+
113
+ def build_path_factory
114
+ PathFactory.new(@directory, @prefix, @include_host, @date_pattern, @include_part, @include_uuid, @is_gzipped)
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,111 @@
1
+ # encoding: utf-8
2
+ require 'zlib'
3
+ require 'concurrent'
4
+ require 'time'
5
+
6
+ module LogStash
7
+ module Outputs
8
+ module Gcs
9
+ # LogFileFactory creates a LogFile according to user specification
10
+ # optionally gzipping it and creating mutexes around modification
11
+ # points.
12
+ class LogFileFactory
13
+ def self.create(path, gzip, synchronize=true, gzip_encoded=false)
14
+ lf = LogStash::Outputs::Gcs::PlainLogFile.new(path)
15
+ lf = LogStash::Outputs::Gcs::GzipLogFile.new(lf) if gzip
16
+ lf = LogStash::Outputs::Gcs::GzipLogFile.new(lf) if gzip_encoded
17
+ lf = LogStash::Outputs::Gcs::SynchronizedLogFile.new(lf) if synchronize
18
+
19
+ lf
20
+ end
21
+ end
22
+
23
+ # PlainLogFile writes events to a plain text file.
24
+ class PlainLogFile
25
+ attr_reader :path, :fd
26
+
27
+ def initialize(path)
28
+ @path = path
29
+ @fd = ::File.new(path, 'a+')
30
+ @last_sync = Time.now
31
+ end
32
+
33
+ def write(*contents)
34
+ contents.each { |c| @fd.write(c) }
35
+ end
36
+
37
+ def fsync
38
+ @fd.fsync
39
+ @last_sync = Time.now
40
+ end
41
+
42
+ def close!
43
+ @fd.fsync
44
+ @fd.close
45
+ end
46
+
47
+ def size
48
+ ::File.stat(@path).size
49
+ end
50
+
51
+ def time_since_sync
52
+ Time.now - @last_sync
53
+ end
54
+ end
55
+
56
+ # GzipLogFile wraps another log file and writes events through it.
57
+ class GzipLogFile
58
+ attr_reader :fd
59
+
60
+ def initialize(child)
61
+ @child = child
62
+ @fd = Zlib::GzipWriter.new(child.fd)
63
+ end
64
+
65
+ def write(*contents)
66
+ contents.each { |c| @fd.write(c) }
67
+ end
68
+
69
+ def fsync
70
+ @fd.flush
71
+ @child.fsync
72
+ end
73
+
74
+ def close!
75
+ fsync
76
+ # The Gzip writer closes the underlying IO after
77
+ # appending the Gzip footer.
78
+ @fd.close
79
+ end
80
+
81
+ def method_missing(method_name, *args, &block)
82
+ @child.send(method_name, *args, &block)
83
+ end
84
+ end
85
+
86
+ # SynchronizedLogFile wraps another log file and uses reentrant locks
87
+ # around its methods to prevent concurrent modification.
88
+ class SynchronizedLogFile
89
+ def initialize(child)
90
+ @child = child
91
+ @lock = Concurrent::ReentrantReadWriteLock.new
92
+ end
93
+
94
+ def time_since_sync
95
+ @lock.with_read_lock { @child.time_since_sync }
96
+ end
97
+
98
+ def path
99
+ @lock.with_read_lock { @child.path }
100
+ end
101
+
102
+ def method_missing(method_name, *args, &block)
103
+ # unless otherwise specified, get a write lock
104
+ @lock.with_write_lock do
105
+ @child.send(method_name, *args, &block)
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require 'thread'
3
+ require 'concurrent'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module Gcs
8
+ # WorkerPool creates a pool of workers that can handle jobs.
9
+ class WorkerPool
10
+ attr_reader :workers
11
+
12
+ def initialize(max_threads, synchronous=false)
13
+ @synchronous = synchronous
14
+
15
+ # set queue depth to the be the same as the number of threads so
16
+ # there's at most one pending job each when the plugin quits
17
+ @workers = Concurrent::ThreadPoolExecutor.new(
18
+ min_threads: 1,
19
+ max_threads: max_threads,
20
+ max_queue: max_threads,
21
+ fallback_policy: :caller_runs
22
+ )
23
+ end
24
+
25
+ # Submits a job to the worker pool, raises an error if the pool has
26
+ # already been stopped.
27
+ def post(&block)
28
+ raise 'Pool already stopped' unless @workers.running?
29
+
30
+ if @synchronous
31
+ block.call
32
+ else
33
+ @workers.post do
34
+ block.call
35
+ end
36
+ end
37
+ end
38
+
39
+ # Stops the worker pool
40
+ def stop!
41
+ @workers.shutdown
42
+ @workers.wait_for_termination
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,304 @@
1
+ # [source,txt]
2
+ # -----
3
+ # encoding: utf-8
4
+ # Author: Rodrigo De Castro <rdc@google.com>
5
+ # Date: 2013-09-20
6
+ #
7
+ # Copyright 2013 Google Inc.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ # -----
21
+ require "logstash/outputs/base"
22
+ require "logstash/outputs/gcs/path_factory"
23
+ require "logstash/outputs/gcs/worker_pool"
24
+ require "logstash/outputs/gcs/log_rotate"
25
+ require "logstash/namespace"
26
+ require "logstash/json"
27
+ require "stud/interval"
28
+ require "thread"
29
+ require "zlib"
30
+
31
+ # Summary: plugin to upload log events to Google Cloud Storage (GCS), rolling
32
+ # files based on the date pattern provided as a configuration setting. Events
33
+ # are written to files locally and, once file is closed, this plugin uploads
34
+ # it to the configured bucket.
35
+ #
36
+ # For more info on Google Cloud Storage, please go to:
37
+ # https://cloud.google.com/products/cloud-storage
38
+ #
39
+ # In order to use this plugin, a Google service account must be used. For
40
+ # more information, please refer to:
41
+ # https://developers.google.com/storage/docs/authentication#service_accounts
42
+ #
43
+ # Recommendation: experiment with the settings depending on how much log
44
+ # data you generate, so the uploader can keep up with the generated logs.
45
+ # Using gzip output can be a good option to reduce network traffic when
46
+ # uploading the log files and in terms of storage costs as well.
47
+ #
48
+ # USAGE:
49
+ # This is an example of logstash config:
50
+ #
51
+ # [source,json]
52
+ # --------------------------
53
+ # output {
54
+ # google_cloud_storage {
55
+ # bucket => "my_bucket" (required)
56
+ # key_path => "/path/to/privatekey.p12" (required)
57
+ # key_password => "notasecret" (optional)
58
+ # service_account => "1234@developer.gserviceaccount.com" (required)
59
+ # temp_directory => "/tmp/logstash-gcs" (optional)
60
+ # log_file_prefix => "logstash_gcs" (optional)
61
+ # max_file_size_kbytes => 1024 (optional)
62
+ # output_format => "plain" (optional)
63
+ # date_pattern => "%Y-%m-%dT%H:00" (optional)
64
+ # flush_interval_secs => 2 (optional)
65
+ # gzip => false (optional)
66
+ # gzip_content_encoding => false (optional)
67
+ # uploader_interval_secs => 60 (optional)
68
+ # upload_synchronous => false (optional)
69
+ # }
70
+ # }
71
+ # --------------------------
72
+ #
73
+ # Improvements TODO list:
74
+ # * Support logstash event variables to determine filename.
75
+ # * Turn Google API code into a Plugin Mixin (like AwsConfig).
76
+ # * There's no recover method, so if logstash/plugin crashes, files may not
77
+ # be uploaded to GCS.
78
+ # * Allow user to configure file name.
79
+ class LogStash::Outputs::GoogleCloudStorage < LogStash::Outputs::Base
80
+ config_name "google_cloud_storage"
81
+
82
+ concurrency :single
83
+
84
+ # GCS bucket name, without "gs://" or any other prefix.
85
+ config :bucket, :validate => :string, :required => true
86
+
87
+ # GCS path to private key file.
88
+ config :key_path, :validate => :string, :required => true
89
+
90
+ # GCS private key password.
91
+ config :key_password, :validate => :string, :default => "notasecret"
92
+
93
+ # GCS service account.
94
+ config :service_account, :validate => :string, :required => true
95
+
96
+ # Directory where temporary files are stored.
97
+ # Defaults to /tmp/logstash-gcs-<random-suffix>
98
+ config :temp_directory, :validate => :string, :default => ""
99
+
100
+ # Log file prefix. Log file will follow the format:
101
+ # <prefix>_hostname_date<.part?>.log
102
+ config :log_file_prefix, :validate => :string, :default => "logstash_gcs"
103
+
104
+ # Sets max file size in kbytes. 0 disable max file check.
105
+ config :max_file_size_kbytes, :validate => :number, :default => 10000
106
+
107
+ # The event format you want to store in files. Defaults to plain text.
108
+ config :output_format, :validate => [ "json", "plain" ], :default => "plain"
109
+
110
+ # Time pattern for log file, defaults to hourly files.
111
+ # Must Time.strftime patterns: www.ruby-doc.org/core-2.0/Time.html#method-i-strftime
112
+ config :date_pattern, :validate => :string, :default => "%Y-%m-%dT%H:00"
113
+
114
+ # Flush interval in seconds for flushing writes to log files. 0 will flush
115
+ # on every message.
116
+ config :flush_interval_secs, :validate => :number, :default => 2
117
+
118
+ # Gzip output stream when writing events to log files, set
119
+ # `Content-Type` to `application/gzip` instead of `text/plain`, and
120
+ # use file suffix `.log.gz` instead of `.log`.
121
+ config :gzip, :validate => :boolean, :default => false
122
+
123
+ # Gzip output stream when writing events to log files and set
124
+ # `Content-Encoding` to `gzip`.
125
+ config :gzip_content_encoding, :validate => :boolean, :default => false
126
+
127
+ # Uploader interval when uploading new files to GCS. Adjust time based
128
+ # on your time pattern (for example, for hourly files, this interval can be
129
+ # around one hour).
130
+ config :uploader_interval_secs, :validate => :number, :default => 60
131
+
132
+ # Should the hostname be included in the file name?
133
+ config :include_hostname, :validate => :boolean, :default => true
134
+
135
+ # Should a UUID be included in the file name?
136
+ config :include_uuid, :validate => :boolean, :default => false
137
+
138
+ # When true, files are uploaded by the event processing thread as soon as a file is ready.
139
+ # When false, (the default behaviour), files will be uploaded in a dedicated thread.
140
+ #
141
+ # Enabling this option provides greater likelihood that all generated files will be
142
+ # to GCS, especially in the event of a graceful shutdown of logstash, such as when an
143
+ # input plugin reaches the end of events. This comes at the price of introducing delays
144
+ # in the event processing pipeline as files are uploaded.
145
+ #
146
+ # When this feature is enabled, the uploader_interval_secs option has no effect.
147
+ config :upload_synchronous, :validate => :boolean, :default => false
148
+
149
+ config :max_concurrent_uploads, :validate => :number, :default => 5
150
+
151
+ public
152
+ def register
153
+ @logger.debug('Registering Google Cloud Storage plugin')
154
+
155
+ @workers = LogStash::Outputs::Gcs::WorkerPool.new(@max_concurrent_uploads, @upload_synchronous)
156
+ initialize_temp_directory
157
+ initialize_path_factory
158
+ initialize_log_rotater
159
+
160
+ initialize_google_client
161
+
162
+ start_uploader
163
+
164
+ @content_type = @gzip ? 'application/gzip' : 'text/plain'
165
+ @content_encoding = @gzip_content_encoding ? 'gzip' : 'identity'
166
+ end
167
+
168
+ # Method called for each log event. It writes the event to the current output
169
+ # file, flushing depending on flush interval configuration.
170
+ public
171
+ def receive(event)
172
+ @logger.debug('Received event', :event => event)
173
+
174
+ if @output_format == 'json'
175
+ message = LogStash::Json.dump(event.to_hash)
176
+ else
177
+ message = event.to_s
178
+ end
179
+
180
+ @log_rotater.writeln(message)
181
+ end
182
+
183
+ public
184
+ def close
185
+ @logger.debug('Stopping the plugin, uploading the remaining files.')
186
+ Thread.kill(@uploader_thread) unless @uploader_thread.nil?
187
+
188
+ # Force rotate the log. If it contains data it will be submitted
189
+ # to the work pool and will be uploaded before the plugin stops.
190
+ @log_rotater.rotate_log!
191
+ @workers.stop!
192
+ end
193
+
194
+ private
195
+
196
+ ##
197
+ # Creates temporary directory, if it does not exist.
198
+ #
199
+ # A random suffix is appended to the temporary directory
200
+ def initialize_temp_directory
201
+ require "stud/temporary"
202
+
203
+ if @temp_directory.empty?
204
+ @temp_directory = Stud::Temporary.directory('logstash-gcs')
205
+ end
206
+
207
+ FileUtils.mkdir_p(@temp_directory) unless File.directory?(@temp_directory)
208
+
209
+ @logger.info("Using temporary directory: #{@temp_directory}")
210
+ end
211
+
212
+ def initialize_path_factory
213
+ @path_factory = LogStash::Outputs::Gcs::PathFactoryBuilder.build do |builder|
214
+ builder.set_directory @temp_directory
215
+ builder.set_prefix @log_file_prefix
216
+ builder.set_include_host @include_hostname
217
+ builder.set_date_pattern @date_pattern
218
+ builder.set_include_part(@max_file_size_kbytes > 0)
219
+ builder.set_include_uuid @include_uuid
220
+ builder.set_is_gzipped @gzip
221
+ end
222
+ end
223
+
224
+ # start_uploader periodically sends flush events through the log rotater
225
+ def start_uploader
226
+ @uploader_thread = Thread.new do
227
+ Stud.interval(@uploader_interval_secs) do
228
+ @log_rotater.writeln(nil)
229
+ end
230
+ end
231
+ end
232
+
233
+ ##
234
+ # Initializes Google Client instantiating client and authorizing access.
235
+ def initialize_google_client
236
+ require "google/api_client"
237
+ require "openssl"
238
+
239
+ @client = Google::APIClient.new(:application_name =>
240
+ 'Logstash Google Cloud Storage output plugin',
241
+ :application_version => '0.1')
242
+ @storage = @client.discovered_api('storage', 'v1')
243
+
244
+ key = Google::APIClient::PKCS12.load_key(@key_path, @key_password)
245
+ service_account = Google::APIClient::JWTAsserter.new(@service_account,
246
+ 'https://www.googleapis.com/auth/devstorage.read_write',
247
+ key)
248
+ @client.authorization = service_account.authorize
249
+ end
250
+
251
+ ##
252
+ # Uploads a local file to the configured bucket.
253
+ def upload_object(filename)
254
+ begin
255
+ @logger.debug("GCS: upload object.", :filename => filename)
256
+
257
+ media = Google::APIClient::UploadIO.new(filename, @content_type)
258
+ metadata_insert_result = @client.execute(:api_method => @storage.objects.insert,
259
+ :parameters => {
260
+ 'uploadType' => 'multipart',
261
+ 'bucket' => @bucket,
262
+ 'contentEncoding' => @content_encoding,
263
+ 'name' => File.basename(filename)
264
+ },
265
+ :body_object => {contentType: @content_type},
266
+ :media => media)
267
+ contents = metadata_insert_result.data
268
+ @logger.debug("GCS: multipart insert",
269
+ :object => contents.name,
270
+ :self_link => contents.self_link)
271
+ rescue => e
272
+ @logger.error("GCS: failed to upload file", :exception => e)
273
+ # TODO(rdc): limit retries?
274
+ sleep 1
275
+ retry
276
+ end
277
+ end
278
+
279
+ def upload_and_delete(filename)
280
+ file_size = File.stat(filename).size
281
+
282
+ if file_size > 0
283
+ upload_object(filename)
284
+ else
285
+ @logger.debug('File size is zero, skip upload.', :filename => filename)
286
+ end
287
+
288
+ @logger.debug('Delete local temporary file', :filename => filename)
289
+ File.delete(filename)
290
+ end
291
+
292
+ def initialize_log_rotater
293
+ max_file_size = @max_file_size_kbytes * 1024
294
+ @log_rotater = LogStash::Outputs::Gcs::LogRotate.new(@path_factory, max_file_size, @gzip, @flush_interval_secs, @gzip_content_encoding)
295
+
296
+ @log_rotater.on_rotate do |filename|
297
+ @logger.info("Rotated out file: #{filename}")
298
+ @workers.post do
299
+ upload_and_delete(filename)
300
+ end
301
+ end
302
+ end
303
+ attr_accessor :active
304
+ end