logstash-input-s3-sdk-3 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,317 @@
1
+ :plugin: s3
2
+ :type: input
3
+ :default_codec: plain
4
+
5
+ ///////////////////////////////////////////
6
+ START - GENERATED VARIABLES, DO NOT EDIT!
7
+ ///////////////////////////////////////////
8
+ :version: %VERSION%
9
+ :release_date: %RELEASE_DATE%
10
+ :changelog_url: %CHANGELOG_URL%
11
+ :include_path: ../../../../logstash/docs/include
12
+ ///////////////////////////////////////////
13
+ END - GENERATED VARIABLES, DO NOT EDIT!
14
+ ///////////////////////////////////////////
15
+
16
+ [id="plugins-{type}s-{plugin}"]
17
+
18
+ === S3 input plugin
19
+
20
+ include::{include_path}/plugin_header.asciidoc[]
21
+
22
+ ==== Description
23
+
24
+ Stream events from files from a S3 bucket.
25
+
26
+ IMPORTANT: The S3 input plugin only supports AWS S3.
27
+ Other S3 compatible storage solutions are not supported.
28
+
29
+ Each line from each file generates an event.
30
+ Files ending in `.gz` are handled as gzip'ed files.
31
+
32
+ Files that are archived to AWS Glacier will be skipped.
33
+
34
+ [id="plugins-{type}s-{plugin}-options"]
35
+ ==== S3 Input Configuration Options
36
+
37
+ This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
38
+
39
+ [cols="<,<,<",options="header",]
40
+ |=======================================================================
41
+ |Setting |Input type|Required
42
+ | <<plugins-{type}s-{plugin}-access_key_id>> |<<string,string>>|No
43
+ | <<plugins-{type}s-{plugin}-additional_settings>> |<<hash,hash>>|No
44
+ | <<plugins-{type}s-{plugin}-aws_credentials_file>> |<<string,string>>|No
45
+ | <<plugins-{type}s-{plugin}-backup_add_prefix>> |<<string,string>>|No
46
+ | <<plugins-{type}s-{plugin}-backup_to_bucket>> |<<string,string>>|No
47
+ | <<plugins-{type}s-{plugin}-backup_to_dir>> |<<string,string>>|No
48
+ | <<plugins-{type}s-{plugin}-bucket>> |<<string,string>>|Yes
49
+ | <<plugins-{type}s-{plugin}-delete>> |<<boolean,boolean>>|No
50
+ | <<plugins-{type}s-{plugin}-endpoint>> |<<string,string>>|No
51
+ | <<plugins-{type}s-{plugin}-exclude_pattern>> |<<string,string>>|No
52
+ | <<plugins-{type}s-{plugin}-gzip_pattern>> |<<string,string>>|No
53
+ | <<plugins-{type}s-{plugin}-include_object_properties>> |<<boolean,boolean>>|No
54
+ | <<plugins-{type}s-{plugin}-interval>> |<<number,number>>|No
55
+ | <<plugins-{type}s-{plugin}-prefix>> |<<string,string>>|No
56
+ | <<plugins-{type}s-{plugin}-proxy_uri>> |<<string,string>>|No
57
+ | <<plugins-{type}s-{plugin}-region>> |<<string,string>>|No
58
+ | <<plugins-{type}s-{plugin}-role_arn>> |<<string,string>>|No
59
+ | <<plugins-{type}s-{plugin}-role_session_name>> |<<string,string>>|No
60
+ | <<plugins-{type}s-{plugin}-secret_access_key>> |<<string,string>>|No
61
+ | <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
62
+ | <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
63
+ | <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
64
+ | <<plugins-{type}s-{plugin}-watch_for_new_files>> |<<boolean,boolean>>|No
65
+ |=======================================================================
66
+
67
+ Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
68
+ input plugins.
69
+
70
+ &nbsp;
71
+
72
+ [id="plugins-{type}s-{plugin}-access_key_id"]
73
+ ===== `access_key_id`
74
+
75
+ * Value type is <<string,string>>
76
+ * There is no default value for this setting.
77
+
78
+ This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
79
+
80
+ 1. Static configuration, using `access_key_id` and `secret_access_key` params in logstash plugin config
81
+ 2. External credentials file specified by `aws_credentials_file`
82
+ 3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
83
+ 4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
84
+ 5. IAM Instance Profile (available when running inside EC2)
85
+
86
+
87
+ [id="plugins-{type}s-{plugin}-additional_settings"]
88
+ ===== `additional_settings`
89
+
90
+ * Value type is <<hash,hash>>
91
+ * Default value is `{}`
92
+
93
+ Key-value pairs of settings and corresponding values used to parametrize
94
+ the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
95
+
96
+ [source,ruby]
97
+ input {
98
+ s3 {
99
+ "access_key_id" => "1234"
100
+ "secret_access_key" => "secret"
101
+ "bucket" => "logstash-test"
102
+ "additional_settings" => {
103
+ "force_path_style" => true
104
+ "follow_redirects" => false
105
+ }
106
+ }
107
+ }
108
+
109
+ [id="plugins-{type}s-{plugin}-aws_credentials_file"]
110
+ ===== `aws_credentials_file`
111
+
112
+ * Value type is <<string,string>>
113
+ * There is no default value for this setting.
114
+
115
+ Path to YAML file containing a hash of AWS credentials.
116
+ This file will only be loaded if `access_key_id` and
117
+ `secret_access_key` aren't set. The contents of the
118
+ file should look like this:
119
+
120
+ [source,ruby]
121
+ ----------------------------------
122
+ :access_key_id: "12345"
123
+ :secret_access_key: "54321"
124
+ ----------------------------------
125
+
126
+
127
+ [id="plugins-{type}s-{plugin}-backup_add_prefix"]
128
+ ===== `backup_add_prefix`
129
+
130
+ * Value type is <<string,string>>
131
+ * Default value is `nil`
132
+
133
+ Append a prefix to the key (full path including file name in s3) after processing.
134
+ If backing up to another (or the same) bucket, this effectively lets you
135
+ choose a new 'folder' to place the files in
136
+
137
+ [id="plugins-{type}s-{plugin}-backup_to_bucket"]
138
+ ===== `backup_to_bucket`
139
+
140
+ * Value type is <<string,string>>
141
+ * Default value is `nil`
142
+
143
+ Name of a S3 bucket to backup processed files to.
144
+
145
+ [id="plugins-{type}s-{plugin}-backup_to_dir"]
146
+ ===== `backup_to_dir`
147
+
148
+ * Value type is <<string,string>>
149
+ * Default value is `nil`
150
+
151
+ Path of a local directory to backup processed files to.
152
+
153
+ [id="plugins-{type}s-{plugin}-bucket"]
154
+ ===== `bucket`
155
+
156
+ * This is a required setting.
157
+ * Value type is <<string,string>>
158
+ * There is no default value for this setting.
159
+
160
+ The name of the S3 bucket.
161
+
162
+ [id="plugins-{type}s-{plugin}-delete"]
163
+ ===== `delete`
164
+
165
+ * Value type is <<boolean,boolean>>
166
+ * Default value is `false`
167
+
168
+ Whether to delete processed files from the original bucket.
169
+
170
+ [id="plugins-{type}s-{plugin}-endpoint"]
171
+ ===== `endpoint`
172
+
173
+ * Value type is <<string,string>>
174
+ * There is no default value for this setting.
175
+
176
+ The endpoint to connect to. By default it is constructed using the value of `region`.
177
+ This is useful when connecting to S3 compatible services, but beware that these aren't
178
+ guaranteed to work correctly with the AWS SDK.
179
+
180
+ [id="plugins-{type}s-{plugin}-exclude_pattern"]
181
+ ===== `exclude_pattern`
182
+
183
+ * Value type is <<string,string>>
184
+ * Default value is `nil`
185
+
186
+ Ruby style regexp of keys to exclude from the bucket.
187
+
188
+ Note that files matching the pattern are skipped _after_ they have been listed.
189
+ Consider using <<plugins-{type}s-{plugin}-prefix>> instead where possible.
190
+
191
+ Example:
192
+
193
+ [source,ruby]
194
+ -----
195
+ "exclude_pattern" => "\/2020\/04\/"
196
+ -----
197
+
198
+ This pattern excludes all logs containing "/2020/04/" in the path.
199
+
200
+
201
+ [id="plugins-{type}s-{plugin}-gzip_pattern"]
202
+ ===== `gzip_pattern`
203
+
204
+ * Value type is <<string,string>>
205
+ * Default value is `"\.gz(ip)?$"`
206
+
207
+ Regular expression used to determine whether an input file is in gzip format.
208
+
209
+ [id="plugins-{type}s-{plugin}-include_object_properties"]
210
+ ===== `include_object_properties`
211
+
212
+ * Value type is <<boolean,boolean>>
213
+ * Default value is `false`
214
+
215
+ Whether or not to include the S3 object's properties (last_modified, content_type, metadata) into each Event at
216
+ `[@metadata][s3]`. Regardless of this setting, `[@metadata][s3][key]` will always be present.
217
+
218
+ [id="plugins-{type}s-{plugin}-interval"]
219
+ ===== `interval`
220
+
221
+ * Value type is <<number,number>>
222
+ * Default value is `60`
223
+
224
+ Interval to wait between to check the file list again after a run is finished.
225
+ Value is in seconds.
226
+
227
+ [id="plugins-{type}s-{plugin}-prefix"]
228
+ ===== `prefix`
229
+
230
+ * Value type is <<string,string>>
231
+ * Default value is `nil`
232
+
233
+ If specified, the prefix of filenames in the bucket must match (not a regexp)
234
+
235
+ [id="plugins-{type}s-{plugin}-proxy_uri"]
236
+ ===== `proxy_uri`
237
+
238
+ * Value type is <<string,string>>
239
+ * There is no default value for this setting.
240
+
241
+ URI to proxy server if required
242
+
243
+ [id="plugins-{type}s-{plugin}-region"]
244
+ ===== `region`
245
+
246
+ * Value type is <<string,string>>
247
+ * Default value is `"us-east-1"`
248
+
249
+ The AWS Region
250
+
251
+ [id="plugins-{type}s-{plugin}-role_arn"]
252
+ ===== `role_arn`
253
+
254
+ * Value type is <<string,string>>
255
+ * There is no default value for this setting.
256
+
257
+ The AWS IAM Role to assume, if any.
258
+ This is used to generate temporary credentials, typically for cross-account access.
259
+ See the https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html[AssumeRole API documentation] for more information.
260
+
261
+ [id="plugins-{type}s-{plugin}-role_session_name"]
262
+ ===== `role_session_name`
263
+
264
+ * Value type is <<string,string>>
265
+ * Default value is `"logstash"`
266
+
267
+ Session name to use when assuming an IAM role.
268
+
269
+ [id="plugins-{type}s-{plugin}-secret_access_key"]
270
+ ===== `secret_access_key`
271
+
272
+ * Value type is <<string,string>>
273
+ * There is no default value for this setting.
274
+
275
+ The AWS Secret Access Key
276
+
277
+ [id="plugins-{type}s-{plugin}-session_token"]
278
+ ===== `session_token`
279
+
280
+ * Value type is <<string,string>>
281
+ * There is no default value for this setting.
282
+
283
+ The AWS Session token for temporary credential
284
+
285
+ [id="plugins-{type}s-{plugin}-sincedb_path"]
286
+ ===== `sincedb_path`
287
+
288
+ * Value type is <<string,string>>
289
+ * Default value is `nil`
290
+
291
+ Where to write the since database (keeps track of the date
292
+ the last handled file was added to S3). The default will write
293
+ sincedb files to in the directory '{path.data}/plugins/inputs/s3/'
294
+
295
+ If specified, this setting must be a filename path and not just a directory.
296
+
297
+ [id="plugins-{type}s-{plugin}-temporary_directory"]
298
+ ===== `temporary_directory`
299
+
300
+ * Value type is <<string,string>>
301
+ * Default value is `"/tmp/logstash"`
302
+
303
+ Set the directory where logstash will store the tmp files before processing them.
304
+
305
+ [id="plugins-{type}s-{plugin}-watch_for_new_files"]
306
+ ===== `watch_for_new_files`
307
+
308
+ * Value type is <<boolean,boolean>>
309
+ * Default value is `true`
310
+
311
+ Whether or not to watch for new files.
312
+ Disabling this option causes the input to close itself after processing the files from a single listing.
313
+
314
+ [id="plugins-{type}s-{plugin}-common-options"]
315
+ include::{include_path}/{type}.asciidoc[]
316
+
317
+ :default_codec!:
@@ -0,0 +1,545 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "time"
5
+ require "date"
6
+ require "tmpdir"
7
+ require "stud/interval"
8
+ require "stud/temporary"
9
+ require "aws-sdk-s3"
10
+ require "logstash/inputs/s3/patch"
11
+
12
+ require 'java'
13
+
14
+ Aws.eager_autoload!
15
+ # Stream events from files from a S3 bucket.
16
+ #
17
+ # Each line from each file generates an event.
18
+ # Files ending in `.gz` are handled as gzip'ed files.
19
+ class LogStash::Inputs::S3 < LogStash::Inputs::Base
20
+
21
+ java_import java.io.InputStream
22
+ java_import java.io.InputStreamReader
23
+ java_import java.io.FileInputStream
24
+ java_import java.io.BufferedReader
25
+ java_import java.util.zip.GZIPInputStream
26
+ java_import java.util.zip.ZipException
27
+
28
+ CredentialConfig = Struct.new(
29
+ :access_key_id,
30
+ :secret_access_key,
31
+ :session_token,
32
+ :profile,
33
+ :instance_profile_credentials_retries,
34
+ :instance_profile_credentials_timeout,
35
+ :region)
36
+
37
+ config_name "s3"
38
+
39
+ default :codec, "plain"
40
+
41
+ # The name of the S3 bucket.
42
+ config :bucket, :validate => :string, :required => true
43
+
44
+ # If specified, the prefix of filenames in the bucket must match (not a regexp)
45
+ config :prefix, :validate => :string, :default => nil
46
+
47
+ config :additional_settings, :validate => :hash, :default => {}
48
+
49
+ # The path to use for writing state. The state stored by this plugin is
50
+ # a memory of files already processed by this plugin.
51
+ #
52
+ # If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
53
+ #
54
+ # Should be a path with filename not just a directory.
55
+ config :sincedb_path, :validate => :string, :default => nil
56
+
57
+ # Name of a S3 bucket to backup processed files to.
58
+ config :backup_to_bucket, :validate => :string, :default => nil
59
+
60
+ # Append a prefix to the key (full path including file name in s3) after processing.
61
+ # If backing up to another (or the same) bucket, this effectively lets you
62
+ # choose a new 'folder' to place the files in
63
+ config :backup_add_prefix, :validate => :string, :default => nil
64
+
65
+ # Path of a local directory to backup processed files to.
66
+ config :backup_to_dir, :validate => :string, :default => nil
67
+
68
+ # Whether to delete processed files from the original bucket.
69
+ config :delete, :validate => :boolean, :default => false
70
+
71
+ # Interval to wait between to check the file list again after a run is finished.
72
+ # Value is in seconds.
73
+ config :interval, :validate => :number, :default => 60
74
+
75
+ # Whether to watch for new files with the interval.
76
+ # If false, overrides any interval and only lists the s3 bucket once.
77
+ config :watch_for_new_files, :validate => :boolean, :default => true
78
+
79
+ # Ruby style regexp of keys to exclude from the bucket
80
+ config :exclude_pattern, :validate => :string, :default => nil
81
+
82
+ # Set the directory where logstash will store the tmp files before processing them.
83
+ # default to the current OS temporary directory in linux /tmp/logstash
84
+ config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
85
+
86
+ # Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
87
+ # into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
88
+ # be present.
89
+ config :include_object_properties, :validate => :boolean, :default => false
90
+
91
+ # Regular expression used to determine whether an input file is in gzip format.
92
+ # default to an expression that matches *.gz and *.gzip file extensions
93
+ config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
94
+
95
+ config :region, :validate => :string, :default => "us-east-1"
96
+
97
+ # This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
98
+ #
99
+ # 1. Static configuration, using `access_key_id` and `secret_access_key` params or `role_arn` in the logstash plugin config
100
+ # 2. External credentials file specified by `aws_credentials_file`
101
+ # 3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
102
+ # 4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
103
+ # 5. IAM Instance Profile (available when running inside EC2)
104
+ config :access_key_id, :validate => :string
105
+
106
+ # The AWS Secret Access Key
107
+ config :secret_access_key, :validate => :string
108
+
109
+ # Profile
110
+ config :profile, :validate => :string, :default => "default"
111
+
112
+ # The AWS Session token for temporary credential
113
+ config :session_token, :validate => :password
114
+
115
+ # URI to proxy server if required
116
+ config :proxy_uri, :validate => :string
117
+
118
+ # Custom endpoint to connect to s3
119
+ config :endpoint, :validate => :string
120
+
121
+ # The AWS IAM Role to assume, if any.
122
+ # This is used to generate temporary credentials typically for cross-account access.
123
+ # See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
124
+ config :role_arn, :validate => :string
125
+
126
+ # Session name to use when assuming an IAM role
127
+ config :role_session_name, :validate => :string, :default => "logstash"
128
+
129
+ # Path to YAML file containing a hash of AWS credentials.
130
+ # This file will only be loaded if `access_key_id` and
131
+ # `secret_access_key` aren't set. The contents of the
132
+ # file should look like this:
133
+ #
134
+ # [source,ruby]
135
+ # ----------------------------------
136
+ # :access_key_id: "12345"
137
+ # :secret_access_key: "54321"
138
+ # ----------------------------------
139
+ #
140
+ config :aws_credentials_file, :validate => :string
141
+
142
+ def register
143
+ require "fileutils"
144
+ require "digest/md5"
145
+
146
+ @logger.info("Registering", :bucket => @bucket, :region => @region)
147
+
148
+ s3 = get_s3object
149
+
150
+ @s3bucket = s3.bucket(@bucket)
151
+
152
+ unless @backup_to_bucket.nil?
153
+ @backup_bucket = s3.bucket(@backup_to_bucket)
154
+ begin
155
+ s3.client.head_bucket({ :bucket => @backup_to_bucket})
156
+ rescue Aws::S3::Errors::NoSuchBucket
157
+ s3.create_bucket({ :bucket => @backup_to_bucket})
158
+ end
159
+ end
160
+
161
+ unless @backup_to_dir.nil?
162
+ Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
163
+ end
164
+
165
+ FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
166
+
167
+ if !@watch_for_new_files && original_params.include?('interval')
168
+ logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
169
+ end
170
+ end
171
+
172
+ def run(queue)
173
+ @current_thread = Thread.current
174
+ Stud.interval(@interval) do
175
+ process_files(queue)
176
+ stop unless @watch_for_new_files
177
+ end
178
+ end # def run
179
+
180
+ def list_new_files
181
+ objects = {}
182
+ found = false
183
+ begin
184
+ @s3bucket.objects(:prefix => @prefix).each do |log|
185
+ found = true
186
+ @logger.debug('Found key', :key => log.key)
187
+ if ignore_filename?(log.key)
188
+ @logger.debug('Ignoring', :key => log.key)
189
+ elsif log.content_length <= 0
190
+ @logger.debug('Object Zero Length', :key => log.key)
191
+ elsif !sincedb.newer?(log.last_modified)
192
+ @logger.debug('Object Not Modified', :key => log.key)
193
+ elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
194
+ @logger.debug('Object Archived to Glacier', :key => log.key)
195
+ else
196
+ objects[log.key] = log.last_modified
197
+ @logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
198
+ end
199
+ end
200
+ @logger.info('No files found in bucket', :prefix => prefix) unless found
201
+ rescue Aws::Errors::ServiceError => e
202
+ @logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
203
+ end
204
+ objects.keys.sort {|a,b| objects[a] <=> objects[b]}
205
+ end # def fetch_new_files
206
+
207
+ def backup_to_bucket(object)
208
+ unless @backup_to_bucket.nil?
209
+ backup_key = "#{@backup_add_prefix}#{object.key}"
210
+ @backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
211
+ if @delete
212
+ object.delete()
213
+ end
214
+ end
215
+ end
216
+
217
+ def backup_to_dir(filename)
218
+ unless @backup_to_dir.nil?
219
+ FileUtils.cp(filename, @backup_to_dir)
220
+ end
221
+ end
222
+
223
+ def process_files(queue)
224
+ objects = list_new_files
225
+
226
+ objects.each do |key|
227
+ if stop?
228
+ break
229
+ else
230
+ process_log(queue, key)
231
+ end
232
+ end
233
+ end # def process_files
234
+
235
+ def stop
236
+ # @current_thread is initialized in the `#run` method,
237
+ # this variable is needed because the `#stop` is a called in another thread
238
+ # than the `#run` method and requiring us to call stop! with a explicit thread.
239
+ Stud.stop!(@current_thread)
240
+ end
241
+
242
+ private
243
+
244
+ # Read the content of the local file
245
+ #
246
+ # @param [Queue] Where to push the event
247
+ # @param [String] Which file to read from
248
+ # @param [S3Object] Source s3 object
249
+ # @return [Boolean] True if the file was completely read, false otherwise.
250
+ def process_local_log(queue, filename, object)
251
+ @logger.debug('Processing file', :filename => filename)
252
+ metadata = {}
253
+ # Currently codecs operates on bytes instead of stream.
254
+ # So all IO stuff: decompression, reading need to be done in the actual
255
+ # input and send as bytes to the codecs.
256
+ read_file(filename) do |line|
257
+ if stop?
258
+ @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
259
+ return false
260
+ end
261
+
262
+ @codec.decode(line) do |event|
263
+ # We are making an assumption concerning cloudfront
264
+ # log format, the user will use the plain or the line codec
265
+ # and the message key will represent the actual line content.
266
+ # If the event is only metadata the event will be drop.
267
+ # This was the behavior of the pre 1.5 plugin.
268
+ #
269
+ # The line need to go through the codecs to replace
270
+ # unknown bytes in the log stream before doing a regexp match or
271
+ # you will get a `Error: invalid byte sequence in UTF-8'
272
+ if event_is_metadata?(event)
273
+ @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
274
+ update_metadata(metadata, event)
275
+ else
276
+ decorate(event)
277
+
278
+ event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
279
+ event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
280
+
281
+ if @include_object_properties
282
+ event.set("[@metadata][s3]", object.data.to_h)
283
+ else
284
+ event.set("[@metadata][s3]", {})
285
+ end
286
+
287
+ event.set("[@metadata][s3][key]", object.key)
288
+
289
+ queue << event
290
+ end
291
+ end
292
+ end
293
+ # #ensure any stateful codecs (such as multi-line ) are flushed to the queue
294
+ @codec.flush do |event|
295
+ queue << event
296
+ end
297
+
298
+ return true
299
+ end # def process_local_log
300
+
301
+ def event_is_metadata?(event)
302
+ return false unless event.get("message").class == String
303
+ line = event.get("message")
304
+ version_metadata?(line) || fields_metadata?(line)
305
+ end
306
+
307
+ def version_metadata?(line)
308
+ line.start_with?('#Version: ')
309
+ end
310
+
311
+ def fields_metadata?(line)
312
+ line.start_with?('#Fields: ')
313
+ end
314
+
315
+ def update_metadata(metadata, event)
316
+ line = event.get('message').strip
317
+
318
+ if version_metadata?(line)
319
+ metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
320
+ end
321
+
322
+ if fields_metadata?(line)
323
+ metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
324
+ end
325
+ end
326
+
327
+ def read_file(filename, &block)
328
+ if gzip?(filename)
329
+ read_gzip_file(filename, block)
330
+ else
331
+ read_plain_file(filename, block)
332
+ end
333
+ rescue => e
334
+ # skip any broken file
335
+ @logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
336
+ end
337
+
338
+ def read_plain_file(filename, block)
339
+ File.open(filename, 'rb') do |file|
340
+ file.each(&block)
341
+ end
342
+ end
343
+
344
+ def read_gzip_file(filename, block)
345
+ file_stream = FileInputStream.new(filename)
346
+ gzip_stream = GZIPInputStream.new(file_stream)
347
+ decoder = InputStreamReader.new(gzip_stream, "UTF-8")
348
+ buffered = BufferedReader.new(decoder)
349
+
350
+ while (line = buffered.readLine())
351
+ block.call(line)
352
+ end
353
+ ensure
354
+ buffered.close unless buffered.nil?
355
+ decoder.close unless decoder.nil?
356
+ gzip_stream.close unless gzip_stream.nil?
357
+ file_stream.close unless file_stream.nil?
358
+ end
359
+
360
+ def gzip?(filename)
361
+ Regexp.new(@gzip_pattern).match(filename)
362
+ end
363
+
364
+ def sincedb
365
+ @sincedb ||= if @sincedb_path.nil?
366
+ @logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
367
+ SinceDB::File.new(sincedb_file)
368
+ else
369
+ @logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
370
+ SinceDB::File.new(@sincedb_path)
371
+ end
372
+ end
373
+
374
+ def sincedb_file
375
+ digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
376
+ dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
377
+ FileUtils::mkdir_p(dir)
378
+ path = File.join(dir, "sincedb_#{digest}")
379
+
380
+ # Migrate old default sincedb path to new one.
381
+ if ENV["HOME"]
382
+ # This is the old file path including the old digest mechanism.
383
+ # It remains as a way to automatically upgrade users with the old default ($HOME)
384
+ # to the new default (path.data)
385
+ old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
386
+ if File.exist?(old)
387
+ logger.info("Migrating old sincedb in $HOME to {path.data}")
388
+ FileUtils.mv(old, path)
389
+ end
390
+ end
391
+
392
+ path
393
+ end
394
+
395
+ def symbolized_settings
396
+ @symbolized_settings ||= symbolize(@additional_settings)
397
+ end
398
+
399
+ def symbolize(hash)
400
+ return hash unless hash.is_a?(Hash)
401
+ symbolized = {}
402
+ hash.each { |key, value| symbolized[key.to_sym] = symbolize(value) }
403
+ symbolized
404
+ end
405
+
406
+ def ignore_filename?(filename)
407
+ if @prefix == filename
408
+ return true
409
+ elsif filename.end_with?("/")
410
+ return true
411
+ elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
412
+ return true
413
+ elsif @exclude_pattern.nil?
414
+ return false
415
+ elsif filename =~ Regexp.new(@exclude_pattern)
416
+ return true
417
+ else
418
+ return false
419
+ end
420
+ end
421
+
422
+ def process_log(queue, key)
423
+ @logger.debug("Processing", :bucket => @bucket, :key => key)
424
+ object = @s3bucket.object(key)
425
+
426
+ filename = File.join(temporary_directory, File.basename(key))
427
+ if download_remote_file(object, filename)
428
+ if process_local_log(queue, filename, object)
429
+ lastmod = object.last_modified
430
+ backup_to_bucket(object)
431
+ backup_to_dir(filename)
432
+ delete_file_from_bucket(object)
433
+ FileUtils.remove_entry_secure(filename, true)
434
+ sincedb.write(lastmod)
435
+ end
436
+ else
437
+ FileUtils.remove_entry_secure(filename, true)
438
+ end
439
+ end
440
+
441
+ # Stream the remove file to the local disk
442
+ #
443
+ # @param [S3Object] Reference to the remove S3 objec to download
444
+ # @param [String] The Temporary filename to stream to.
445
+ # @return [Boolean] True if the file was completely downloaded
446
+ def download_remote_file(remote_object, local_filename)
447
+ completed = false
448
+ @logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
449
+ File.open(local_filename, 'wb') do |s3file|
450
+ return completed if stop?
451
+ begin
452
+ remote_object.get(:response_target => s3file)
453
+ completed = true
454
+ rescue Aws::Errors::ServiceError => e
455
+ @logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
456
+ end
457
+ end
458
+ completed
459
+ end
460
+
461
+ def delete_file_from_bucket(object)
462
+ if @delete and @backup_to_bucket.nil?
463
+ object.delete()
464
+ end
465
+ end
466
+
467
+ def aws_options_hash
468
+ opts = {}
469
+
470
+ if @access_key_id.is_a?(NilClass) ^ @secret_access_key.is_a?(NilClass)
471
+ @logger.warn("Likely config error: Only one of access_key_id or secret_access_key was provided but not both.")
472
+ end
473
+
474
+ credential_config = CredentialConfig.new(@access_key_id, @secret_access_key, @session_token, @profile, 0, 1, @region)
475
+ @credentials = Aws::CredentialProviderChain.new(credential_config).resolve
476
+
477
+ opts[:credentials] = @credentials
478
+
479
+ opts[:http_proxy] = @proxy_uri if @proxy_uri
480
+
481
+ if self.respond_to?(:aws_service_endpoint)
482
+ # used by CloudWatch to basically do the same as bellow (returns { region: region })
483
+ opts.merge!(self.aws_service_endpoint(@region))
484
+ else
485
+ # NOTE: setting :region works with the aws sdk (resolves correct endpoint)
486
+ opts[:region] = @region
487
+ end
488
+
489
+ if !@endpoint.is_a?(NilClass)
490
+ opts[:endpoint] = @endpoint
491
+ end
492
+
493
+ return opts
494
+ end
495
+
496
+ def get_s3object
497
+ options = symbolized_settings.merge(aws_options_hash || {})
498
+ s3 = Aws::S3::Resource.new(options)
499
+ end
500
+
501
+ def file_restored?(object)
502
+ begin
503
+ restore = object.data.restore
504
+ if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
505
+ if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
506
+ expiry_date = DateTime.parse(restore[1])
507
+ return true if DateTime.now < expiry_date # restored
508
+ else
509
+ @logger.debug("No expiry-date header for restore request: #{object.data.restore}")
510
+ return nil # no expiry-date found for ongoing request
511
+ end
512
+ end
513
+ rescue => e
514
+ @logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
515
+ end
516
+ return false
517
+ end
518
+
519
+ module SinceDB
520
+ class File
521
+ def initialize(file)
522
+ @sincedb_path = file
523
+ end
524
+
525
+ def newer?(date)
526
+ date > read
527
+ end
528
+
529
+ def read
530
+ if ::File.exists?(@sincedb_path)
531
+ content = ::File.read(@sincedb_path).chomp.strip
532
+ # If the file was created but we didn't have the time to write to it
533
+ return content.empty? ? Time.new(0) : Time.parse(content)
534
+ else
535
+ return Time.new(0)
536
+ end
537
+ end
538
+
539
+ def write(since = nil)
540
+ since = Time.now() if since.nil?
541
+ ::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
542
+ end
543
+ end
544
+ end
545
+ end # class LogStash::Inputs::S3