logstash-input-s3-test 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ :plugin: s3
2
+ :type: input
3
+ :default_codec: plain
4
+
5
+ ///////////////////////////////////////////
6
+ START - GENERATED VARIABLES, DO NOT EDIT!
7
+ ///////////////////////////////////////////
8
+ :version: %VERSION%
9
+ :release_date: %RELEASE_DATE%
10
+ :changelog_url: %CHANGELOG_URL%
11
+ :include_path: ../../../../logstash/docs/include
12
+ ///////////////////////////////////////////
13
+ END - GENERATED VARIABLES, DO NOT EDIT!
14
+ ///////////////////////////////////////////
15
+
16
+ [id="plugins-{type}s-{plugin}"]
17
+
18
+ === S3 input plugin
19
+
20
+ include::{include_path}/plugin_header.asciidoc[]
21
+
22
+ ==== Description
23
+
24
+ Stream events from files from a S3 bucket.
25
+
26
+ IMPORTANT: The S3 input plugin only supports AWS S3.
27
+ Other S3 compatible storage solutions are not supported.
28
+
29
+ Each line from each file generates an event.
30
+ Files ending in `.gz` are handled as gzip'ed files.
31
+
32
+ Files that are archived to AWS Glacier will be skipped.
33
+
34
+ [id="plugins-{type}s-{plugin}-options"]
35
+ ==== S3 Input Configuration Options
36
+
37
+ This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
38
+
39
+ [cols="<,<,<",options="header",]
40
+ |=======================================================================
41
+ |Setting |Input type|Required
42
+ | <<plugins-{type}s-{plugin}-access_key_id>> |<<string,string>>|No
43
+ | <<plugins-{type}s-{plugin}-additional_settings>> |<<hash,hash>>|No
44
+ | <<plugins-{type}s-{plugin}-aws_credentials_file>> |<<string,string>>|No
45
+ | <<plugins-{type}s-{plugin}-backup_add_prefix>> |<<string,string>>|No
46
+ | <<plugins-{type}s-{plugin}-backup_to_bucket>> |<<string,string>>|No
47
+ | <<plugins-{type}s-{plugin}-backup_to_dir>> |<<string,string>>|No
48
+ | <<plugins-{type}s-{plugin}-bucket>> |<<string,string>>|Yes
49
+ | <<plugins-{type}s-{plugin}-delete>> |<<boolean,boolean>>|No
50
+ | <<plugins-{type}s-{plugin}-endpoint>> |<<string,string>>|No
51
+ | <<plugins-{type}s-{plugin}-exclude_pattern>> |<<string,string>>|No
52
+ | <<plugins-{type}s-{plugin}-gzip_pattern>> |<<string,string>>|No
53
+ | <<plugins-{type}s-{plugin}-include_object_properties>> |<<boolean,boolean>>|No
54
+ | <<plugins-{type}s-{plugin}-interval>> |<<number,number>>|No
55
+ | <<plugins-{type}s-{plugin}-prefix>> |<<string,string>>|No
56
+ | <<plugins-{type}s-{plugin}-proxy_uri>> |<<string,string>>|No
57
+ | <<plugins-{type}s-{plugin}-region>> |<<string,string>>|No
58
+ | <<plugins-{type}s-{plugin}-role_arn>> |<<string,string>>|No
59
+ | <<plugins-{type}s-{plugin}-role_session_name>> |<<string,string>>|No
60
+ | <<plugins-{type}s-{plugin}-secret_access_key>> |<<string,string>>|No
61
+ | <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
62
+ | <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
63
+ | <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
64
+ | <<plugins-{type}s-{plugin}-watch_for_new_files>> |<<boolean,boolean>>|No
65
+ |=======================================================================
66
+
67
+ Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
68
+ input plugins.
69
+
70
+ &nbsp;
71
+
72
+ [id="plugins-{type}s-{plugin}-access_key_id"]
73
+ ===== `access_key_id`
74
+
75
+ * Value type is <<string,string>>
76
+ * There is no default value for this setting.
77
+
78
+ This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
79
+
80
+ 1. Static configuration, using `access_key_id` and `secret_access_key` params in logstash plugin config
81
+ 2. External credentials file specified by `aws_credentials_file`
82
+ 3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
83
+ 4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
84
+ 5. IAM Instance Profile (available when running inside EC2)
85
+
86
+
87
+ [id="plugins-{type}s-{plugin}-additional_settings"]
88
+ ===== `additional_settings`
89
+
90
+ * Value type is <<hash,hash>>
91
+ * Default value is `{}`
92
+
93
+ Key-value pairs of settings and corresponding values used to parametrize
94
+ the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
95
+
96
+ [source,ruby]
97
+ input {
98
+ s3 {
99
+ "access_key_id" => "1234"
100
+ "secret_access_key" => "secret"
101
+ "bucket" => "logstash-test"
102
+ "additional_settings" => {
103
+ "force_path_style" => true
104
+ "follow_redirects" => false
105
+ }
106
+ }
107
+ }
108
+
109
+ [id="plugins-{type}s-{plugin}-aws_credentials_file"]
110
+ ===== `aws_credentials_file`
111
+
112
+ * Value type is <<string,string>>
113
+ * There is no default value for this setting.
114
+
115
+ Path to YAML file containing a hash of AWS credentials.
116
+ This file will only be loaded if `access_key_id` and
117
+ `secret_access_key` aren't set. The contents of the
118
+ file should look like this:
119
+
120
+ [source,ruby]
121
+ ----------------------------------
122
+ :access_key_id: "12345"
123
+ :secret_access_key: "54321"
124
+ ----------------------------------
125
+
126
+
127
+ [id="plugins-{type}s-{plugin}-backup_add_prefix"]
128
+ ===== `backup_add_prefix`
129
+
130
+ * Value type is <<string,string>>
131
+ * Default value is `nil`
132
+
133
+ Append a prefix to the key (full path including file name in s3) after processing.
134
+ If backing up to another (or the same) bucket, this effectively lets you
135
+ choose a new 'folder' to place the files in
136
+
137
+ [id="plugins-{type}s-{plugin}-backup_to_bucket"]
138
+ ===== `backup_to_bucket`
139
+
140
+ * Value type is <<string,string>>
141
+ * Default value is `nil`
142
+
143
+ Name of a S3 bucket to backup processed files to.
144
+
145
+ [id="plugins-{type}s-{plugin}-backup_to_dir"]
146
+ ===== `backup_to_dir`
147
+
148
+ * Value type is <<string,string>>
149
+ * Default value is `nil`
150
+
151
+ Path of a local directory to backup processed files to.
152
+
153
+ [id="plugins-{type}s-{plugin}-bucket"]
154
+ ===== `bucket`
155
+
156
+ * This is a required setting.
157
+ * Value type is <<string,string>>
158
+ * There is no default value for this setting.
159
+
160
+ The name of the S3 bucket.
161
+
162
+ [id="plugins-{type}s-{plugin}-delete"]
163
+ ===== `delete`
164
+
165
+ * Value type is <<boolean,boolean>>
166
+ * Default value is `false`
167
+
168
+ Whether to delete processed files from the original bucket.
169
+
170
+ [id="plugins-{type}s-{plugin}-endpoint"]
171
+ ===== `endpoint`
172
+
173
+ * Value type is <<string,string>>
174
+ * There is no default value for this setting.
175
+
176
+ The endpoint to connect to. By default it is constructed using the value of `region`.
177
+ This is useful when connecting to S3 compatible services, but beware that these aren't
178
+ guaranteed to work correctly with the AWS SDK.
179
+
180
+ [id="plugins-{type}s-{plugin}-exclude_pattern"]
181
+ ===== `exclude_pattern`
182
+
183
+ * Value type is <<string,string>>
184
+ * Default value is `nil`
185
+
186
+ Ruby style regexp of keys to exclude from the bucket.
187
+
188
+ Note that files matching the pattern are skipped _after_ they have been listed.
189
+ Consider using <<plugins-{type}s-{plugin}-prefix>> instead where possible.
190
+
191
+ Example:
192
+
193
+ [source,ruby]
194
+ -----
195
+ "exclude_pattern" => "\/2020\/04\/"
196
+ -----
197
+
198
+ This pattern excludes all logs containing "/2020/04/" in the path.
199
+
200
+
201
+ [id="plugins-{type}s-{plugin}-gzip_pattern"]
202
+ ===== `gzip_pattern`
203
+
204
+ * Value type is <<string,string>>
205
+ * Default value is `"\.gz(ip)?$"`
206
+
207
+ Regular expression used to determine whether an input file is in gzip format.
208
+
209
+ [id="plugins-{type}s-{plugin}-include_object_properties"]
210
+ ===== `include_object_properties`
211
+
212
+ * Value type is <<boolean,boolean>>
213
+ * Default value is `false`
214
+
215
+ Whether or not to include the S3 object's properties (last_modified, content_type, metadata) into each Event at
216
+ `[@metadata][s3]`. Regardless of this setting, `[@metadata][s3][key]` will always be present.
217
+
218
+ [id="plugins-{type}s-{plugin}-interval"]
219
+ ===== `interval`
220
+
221
+ * Value type is <<number,number>>
222
+ * Default value is `60`
223
+
224
+ Interval to wait between to check the file list again after a run is finished.
225
+ Value is in seconds.
226
+
227
+ [id="plugins-{type}s-{plugin}-prefix"]
228
+ ===== `prefix`
229
+
230
+ * Value type is <<string,string>>
231
+ * Default value is `nil`
232
+
233
+ If specified, the prefix of filenames in the bucket must match (not a regexp)
234
+
235
+ [id="plugins-{type}s-{plugin}-proxy_uri"]
236
+ ===== `proxy_uri`
237
+
238
+ * Value type is <<string,string>>
239
+ * There is no default value for this setting.
240
+
241
+ URI to proxy server if required
242
+
243
+ [id="plugins-{type}s-{plugin}-region"]
244
+ ===== `region`
245
+
246
+ * Value type is <<string,string>>
247
+ * Default value is `"us-east-1"`
248
+
249
+ The AWS Region
250
+
251
+ [id="plugins-{type}s-{plugin}-role_arn"]
252
+ ===== `role_arn`
253
+
254
+ * Value type is <<string,string>>
255
+ * There is no default value for this setting.
256
+
257
+ The AWS IAM Role to assume, if any.
258
+ This is used to generate temporary credentials, typically for cross-account access.
259
+ See the https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html[AssumeRole API documentation] for more information.
260
+
261
+ [id="plugins-{type}s-{plugin}-role_session_name"]
262
+ ===== `role_session_name`
263
+
264
+ * Value type is <<string,string>>
265
+ * Default value is `"logstash"`
266
+
267
+ Session name to use when assuming an IAM role.
268
+
269
+ [id="plugins-{type}s-{plugin}-secret_access_key"]
270
+ ===== `secret_access_key`
271
+
272
+ * Value type is <<string,string>>
273
+ * There is no default value for this setting.
274
+
275
+ The AWS Secret Access Key
276
+
277
+ [id="plugins-{type}s-{plugin}-session_token"]
278
+ ===== `session_token`
279
+
280
+ * Value type is <<string,string>>
281
+ * There is no default value for this setting.
282
+
283
+ The AWS Session token for temporary credential
284
+
285
+ [id="plugins-{type}s-{plugin}-sincedb_path"]
286
+ ===== `sincedb_path`
287
+
288
+ * Value type is <<string,string>>
289
+ * Default value is `nil`
290
+
291
+ Where to write the since database (keeps track of the date
292
+ the last handled file was added to S3). The default will write
293
+ sincedb files to in the directory '{path.data}/plugins/inputs/s3/'
294
+
295
+ If specified, this setting must be a filename path and not just a directory.
296
+
297
+ [id="plugins-{type}s-{plugin}-temporary_directory"]
298
+ ===== `temporary_directory`
299
+
300
+ * Value type is <<string,string>>
301
+ * Default value is `"/tmp/logstash"`
302
+
303
+ Set the directory where logstash will store the tmp files before processing them.
304
+
305
+ [id="plugins-{type}s-{plugin}-watch_for_new_files"]
306
+ ===== `watch_for_new_files`
307
+
308
+ * Value type is <<boolean,boolean>>
309
+ * Default value is `true`
310
+
311
+ Whether or not to watch for new files.
312
+ Disabling this option causes the input to close itself after processing the files from a single listing.
313
+
314
+ [id="plugins-{type}s-{plugin}-common-options"]
315
+ include::{include_path}/{type}.asciidoc[]
316
+
317
+ :default_codec!:
@@ -0,0 +1,545 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "time"
5
+ require "date"
6
+ require "tmpdir"
7
+ require "stud/interval"
8
+ require "stud/temporary"
9
+ require "aws-sdk-s3"
10
+ require "logstash/inputs/s3/patch"
11
+
12
+ require 'java'
13
+
14
+ Aws.eager_autoload!
15
+ # Stream events from files from a S3 bucket.
16
+ #
17
+ # Each line from each file generates an event.
18
+ # Files ending in `.gz` are handled as gzip'ed files.
19
+ class LogStash::Inputs::S3 < LogStash::Inputs::Base
20
+
21
+ java_import java.io.InputStream
22
+ java_import java.io.InputStreamReader
23
+ java_import java.io.FileInputStream
24
+ java_import java.io.BufferedReader
25
+ java_import java.util.zip.GZIPInputStream
26
+ java_import java.util.zip.ZipException
27
+
28
+ CredentialConfig = Struct.new(
29
+ :access_key_id,
30
+ :secret_access_key,
31
+ :session_token,
32
+ :profile,
33
+ :instance_profile_credentials_retries,
34
+ :instance_profile_credentials_timeout,
35
+ :region)
36
+
37
+ config_name "s3"
38
+
39
+ default :codec, "plain"
40
+
41
+ # The name of the S3 bucket.
42
+ config :bucket, :validate => :string, :required => true
43
+
44
+ # If specified, the prefix of filenames in the bucket must match (not a regexp)
45
+ config :prefix, :validate => :string, :default => nil
46
+
47
+ config :additional_settings, :validate => :hash, :default => {}
48
+
49
+ # The path to use for writing state. The state stored by this plugin is
50
+ # a memory of files already processed by this plugin.
51
+ #
52
+ # If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
53
+ #
54
+ # Should be a path with filename not just a directory.
55
+ config :sincedb_path, :validate => :string, :default => nil
56
+
57
+ # Name of a S3 bucket to backup processed files to.
58
+ config :backup_to_bucket, :validate => :string, :default => nil
59
+
60
+ # Append a prefix to the key (full path including file name in s3) after processing.
61
+ # If backing up to another (or the same) bucket, this effectively lets you
62
+ # choose a new 'folder' to place the files in
63
+ config :backup_add_prefix, :validate => :string, :default => nil
64
+
65
+ # Path of a local directory to backup processed files to.
66
+ config :backup_to_dir, :validate => :string, :default => nil
67
+
68
+ # Whether to delete processed files from the original bucket.
69
+ config :delete, :validate => :boolean, :default => false
70
+
71
+ # Interval to wait between to check the file list again after a run is finished.
72
+ # Value is in seconds.
73
+ config :interval, :validate => :number, :default => 60
74
+
75
+ # Whether to watch for new files with the interval.
76
+ # If false, overrides any interval and only lists the s3 bucket once.
77
+ config :watch_for_new_files, :validate => :boolean, :default => true
78
+
79
+ # Ruby style regexp of keys to exclude from the bucket
80
+ config :exclude_pattern, :validate => :string, :default => nil
81
+
82
+ # Set the directory where logstash will store the tmp files before processing them.
83
+ # default to the current OS temporary directory in linux /tmp/logstash
84
+ config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
85
+
86
+ # Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
87
+ # into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
88
+ # be present.
89
+ config :include_object_properties, :validate => :boolean, :default => false
90
+
91
+ # Regular expression used to determine whether an input file is in gzip format.
92
+ # default to an expression that matches *.gz and *.gzip file extensions
93
+ config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
94
+
95
+ config :region, :validate => :string, :default => "us-east-1"
96
+
97
+ # This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
98
+ #
99
+ # 1. Static configuration, using `access_key_id` and `secret_access_key` params or `role_arn` in the logstash plugin config
100
+ # 2. External credentials file specified by `aws_credentials_file`
101
+ # 3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
102
+ # 4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
103
+ # 5. IAM Instance Profile (available when running inside EC2)
104
+ config :access_key_id, :validate => :string
105
+
106
+ # The AWS Secret Access Key
107
+ config :secret_access_key, :validate => :string
108
+
109
+ # Profile
110
+ config :profile, :validate => :string, :default => "default"
111
+
112
+ # The AWS Session token for temporary credential
113
+ config :session_token, :validate => :password
114
+
115
+ # URI to proxy server if required
116
+ config :proxy_uri, :validate => :string
117
+
118
+ # Custom endpoint to connect to s3
119
+ config :endpoint, :validate => :string
120
+
121
+ # The AWS IAM Role to assume, if any.
122
+ # This is used to generate temporary credentials typically for cross-account access.
123
+ # See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
124
+ config :role_arn, :validate => :string
125
+
126
+ # Session name to use when assuming an IAM role
127
+ config :role_session_name, :validate => :string, :default => "logstash"
128
+
129
+ # Path to YAML file containing a hash of AWS credentials.
130
+ # This file will only be loaded if `access_key_id` and
131
+ # `secret_access_key` aren't set. The contents of the
132
+ # file should look like this:
133
+ #
134
+ # [source,ruby]
135
+ # ----------------------------------
136
+ # :access_key_id: "12345"
137
+ # :secret_access_key: "54321"
138
+ # ----------------------------------
139
+ #
140
+ config :aws_credentials_file, :validate => :string
141
+
142
+ def register
143
+ require "fileutils"
144
+ require "digest/md5"
145
+
146
+ @logger.info("Registering", :bucket => @bucket, :region => @region)
147
+
148
+ s3 = get_s3object
149
+
150
+ @s3bucket = s3.bucket(@bucket)
151
+
152
+ unless @backup_to_bucket.nil?
153
+ @backup_bucket = s3.bucket(@backup_to_bucket)
154
+ begin
155
+ s3.client.head_bucket({ :bucket => @backup_to_bucket})
156
+ rescue Aws::S3::Errors::NoSuchBucket
157
+ s3.create_bucket({ :bucket => @backup_to_bucket})
158
+ end
159
+ end
160
+
161
+ unless @backup_to_dir.nil?
162
+ Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
163
+ end
164
+
165
+ FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
166
+
167
+ if !@watch_for_new_files && original_params.include?('interval')
168
+ logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
169
+ end
170
+ end
171
+
172
+ def run(queue)
173
+ @current_thread = Thread.current
174
+ Stud.interval(@interval) do
175
+ process_files(queue)
176
+ stop unless @watch_for_new_files
177
+ end
178
+ end # def run
179
+
180
+ def list_new_files
181
+ objects = {}
182
+ found = false
183
+ begin
184
+ @s3bucket.objects(:prefix => @prefix).each do |log|
185
+ found = true
186
+ @logger.debug('Found key', :key => log.key)
187
+ if ignore_filename?(log.key)
188
+ @logger.debug('Ignoring', :key => log.key)
189
+ elsif log.content_length <= 0
190
+ @logger.debug('Object Zero Length', :key => log.key)
191
+ elsif !sincedb.newer?(log.last_modified)
192
+ @logger.debug('Object Not Modified', :key => log.key)
193
+ elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
194
+ @logger.debug('Object Archived to Glacier', :key => log.key)
195
+ else
196
+ objects[log.key] = log.last_modified
197
+ @logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
198
+ end
199
+ end
200
+ @logger.info('No files found in bucket', :prefix => prefix) unless found
201
+ rescue Aws::Errors::ServiceError => e
202
+ @logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
203
+ end
204
+ objects.keys.sort {|a,b| objects[a] <=> objects[b]}
205
+ end # def fetch_new_files
206
+
207
+ def backup_to_bucket(object)
208
+ unless @backup_to_bucket.nil?
209
+ backup_key = "#{@backup_add_prefix}#{object.key}"
210
+ @backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
211
+ if @delete
212
+ object.delete()
213
+ end
214
+ end
215
+ end
216
+
217
+ def backup_to_dir(filename)
218
+ unless @backup_to_dir.nil?
219
+ FileUtils.cp(filename, @backup_to_dir)
220
+ end
221
+ end
222
+
223
+ def process_files(queue)
224
+ objects = list_new_files
225
+
226
+ objects.each do |key|
227
+ if stop?
228
+ break
229
+ else
230
+ process_log(queue, key)
231
+ end
232
+ end
233
+ end # def process_files
234
+
235
+ def stop
236
+ # @current_thread is initialized in the `#run` method,
237
+ # this variable is needed because the `#stop` is a called in another thread
238
+ # than the `#run` method and requiring us to call stop! with a explicit thread.
239
+ Stud.stop!(@current_thread)
240
+ end
241
+
242
+ private
243
+
244
+ # Read the content of the local file
245
+ #
246
+ # @param [Queue] Where to push the event
247
+ # @param [String] Which file to read from
248
+ # @param [S3Object] Source s3 object
249
+ # @return [Boolean] True if the file was completely read, false otherwise.
250
+ def process_local_log(queue, filename, object)
251
+ @logger.debug('Processing file', :filename => filename)
252
+ metadata = {}
253
+ # Currently codecs operates on bytes instead of stream.
254
+ # So all IO stuff: decompression, reading need to be done in the actual
255
+ # input and send as bytes to the codecs.
256
+ read_file(filename) do |line|
257
+ if stop?
258
+ @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
259
+ return false
260
+ end
261
+
262
+ @codec.decode(line) do |event|
263
+ # We are making an assumption concerning cloudfront
264
+ # log format, the user will use the plain or the line codec
265
+ # and the message key will represent the actual line content.
266
+ # If the event is only metadata the event will be drop.
267
+ # This was the behavior of the pre 1.5 plugin.
268
+ #
269
+ # The line need to go through the codecs to replace
270
+ # unknown bytes in the log stream before doing a regexp match or
271
+ # you will get a `Error: invalid byte sequence in UTF-8'
272
+ if event_is_metadata?(event)
273
+ @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
274
+ update_metadata(metadata, event)
275
+ else
276
+ decorate(event)
277
+
278
+ event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
279
+ event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
280
+
281
+ if @include_object_properties
282
+ event.set("[@metadata][s3]", object.data.to_h)
283
+ else
284
+ event.set("[@metadata][s3]", {})
285
+ end
286
+
287
+ event.set("[@metadata][s3][key]", object.key)
288
+
289
+ queue << event
290
+ end
291
+ end
292
+ end
293
+ # #ensure any stateful codecs (such as multi-line ) are flushed to the queue
294
+ @codec.flush do |event|
295
+ queue << event
296
+ end
297
+
298
+ return true
299
+ end # def process_local_log
300
+
301
+ def event_is_metadata?(event)
302
+ return false unless event.get("message").class == String
303
+ line = event.get("message")
304
+ version_metadata?(line) || fields_metadata?(line)
305
+ end
306
+
307
+ def version_metadata?(line)
308
+ line.start_with?('#Version: ')
309
+ end
310
+
311
+ def fields_metadata?(line)
312
+ line.start_with?('#Fields: ')
313
+ end
314
+
315
+ def update_metadata(metadata, event)
316
+ line = event.get('message').strip
317
+
318
+ if version_metadata?(line)
319
+ metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
320
+ end
321
+
322
+ if fields_metadata?(line)
323
+ metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
324
+ end
325
+ end
326
+
327
+ def read_file(filename, &block)
328
+ if gzip?(filename)
329
+ read_gzip_file(filename, block)
330
+ else
331
+ read_plain_file(filename, block)
332
+ end
333
+ rescue => e
334
+ # skip any broken file
335
+ @logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
336
+ end
337
+
338
+ def read_plain_file(filename, block)
339
+ File.open(filename, 'rb') do |file|
340
+ file.each(&block)
341
+ end
342
+ end
343
+
344
+ def read_gzip_file(filename, block)
345
+ file_stream = FileInputStream.new(filename)
346
+ gzip_stream = GZIPInputStream.new(file_stream)
347
+ decoder = InputStreamReader.new(gzip_stream, "UTF-8")
348
+ buffered = BufferedReader.new(decoder)
349
+
350
+ while (line = buffered.readLine())
351
+ block.call(line)
352
+ end
353
+ ensure
354
+ buffered.close unless buffered.nil?
355
+ decoder.close unless decoder.nil?
356
+ gzip_stream.close unless gzip_stream.nil?
357
+ file_stream.close unless file_stream.nil?
358
+ end
359
+
360
+ def gzip?(filename)
361
+ Regexp.new(@gzip_pattern).match(filename)
362
+ end
363
+
364
+ def sincedb
365
+ @sincedb ||= if @sincedb_path.nil?
366
+ @logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
367
+ SinceDB::File.new(sincedb_file)
368
+ else
369
+ @logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
370
+ SinceDB::File.new(@sincedb_path)
371
+ end
372
+ end
373
+
374
+ def sincedb_file
375
+ digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
376
+ dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
377
+ FileUtils::mkdir_p(dir)
378
+ path = File.join(dir, "sincedb_#{digest}")
379
+
380
+ # Migrate old default sincedb path to new one.
381
+ if ENV["HOME"]
382
+ # This is the old file path including the old digest mechanism.
383
+ # It remains as a way to automatically upgrade users with the old default ($HOME)
384
+ # to the new default (path.data)
385
+ old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
386
+ if File.exist?(old)
387
+ logger.info("Migrating old sincedb in $HOME to {path.data}")
388
+ FileUtils.mv(old, path)
389
+ end
390
+ end
391
+
392
+ path
393
+ end
394
+
395
+ def symbolized_settings
396
+ @symbolized_settings ||= symbolize(@additional_settings)
397
+ end
398
+
399
+ def symbolize(hash)
400
+ return hash unless hash.is_a?(Hash)
401
+ symbolized = {}
402
+ hash.each { |key, value| symbolized[key.to_sym] = symbolize(value) }
403
+ symbolized
404
+ end
405
+
406
+ def ignore_filename?(filename)
407
+ if @prefix == filename
408
+ return true
409
+ elsif filename.end_with?("/")
410
+ return true
411
+ elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
412
+ return true
413
+ elsif @exclude_pattern.nil?
414
+ return false
415
+ elsif filename =~ Regexp.new(@exclude_pattern)
416
+ return true
417
+ else
418
+ return false
419
+ end
420
+ end
421
+
422
+ def process_log(queue, key)
423
+ @logger.debug("Processing", :bucket => @bucket, :key => key)
424
+ object = @s3bucket.object(key)
425
+
426
+ filename = File.join(temporary_directory, File.basename(key))
427
+ if download_remote_file(object, filename)
428
+ if process_local_log(queue, filename, object)
429
+ lastmod = object.last_modified
430
+ backup_to_bucket(object)
431
+ backup_to_dir(filename)
432
+ delete_file_from_bucket(object)
433
+ FileUtils.remove_entry_secure(filename, true)
434
+ sincedb.write(lastmod)
435
+ end
436
+ else
437
+ FileUtils.remove_entry_secure(filename, true)
438
+ end
439
+ end
440
+
441
+ # Stream the remove file to the local disk
442
+ #
443
+ # @param [S3Object] Reference to the remove S3 objec to download
444
+ # @param [String] The Temporary filename to stream to.
445
+ # @return [Boolean] True if the file was completely downloaded
446
+ def download_remote_file(remote_object, local_filename)
447
+ completed = false
448
+ @logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
449
+ File.open(local_filename, 'wb') do |s3file|
450
+ return completed if stop?
451
+ begin
452
+ remote_object.get(:response_target => s3file)
453
+ completed = true
454
+ rescue Aws::Errors::ServiceError => e
455
+ @logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
456
+ end
457
+ end
458
+ completed
459
+ end
460
+
461
+ def delete_file_from_bucket(object)
462
+ if @delete and @backup_to_bucket.nil?
463
+ object.delete()
464
+ end
465
+ end
466
+
467
+ def aws_options_hash
468
+ opts = {}
469
+
470
+ if @access_key_id.is_a?(NilClass) ^ @secret_access_key.is_a?(NilClass)
471
+ @logger.warn("Likely config error: Only one of access_key_id or secret_access_key was provided but not both.")
472
+ end
473
+
474
+ credential_config = CredentialConfig.new(@access_key_id, @secret_access_key, @session_token, @profile, 0, 1, @region)
475
+ @credentials = Aws::CredentialProviderChain.new(credential_config).resolve
476
+
477
+ opts[:credentials] = @credentials
478
+
479
+ opts[:http_proxy] = @proxy_uri if @proxy_uri
480
+
481
+ if self.respond_to?(:aws_service_endpoint)
482
+ # used by CloudWatch to basically do the same as bellow (returns { region: region })
483
+ opts.merge!(self.aws_service_endpoint(@region))
484
+ else
485
+ # NOTE: setting :region works with the aws sdk (resolves correct endpoint)
486
+ opts[:region] = @region
487
+ end
488
+
489
+ if !@endpoint.is_a?(NilClass)
490
+ opts[:endpoint] = @endpoint
491
+ end
492
+
493
+ return opts
494
+ end
495
+
496
+ def get_s3object
497
+ options = symbolized_settings.merge(aws_options_hash || {})
498
+ s3 = Aws::S3::Resource.new(options)
499
+ end
500
+
501
+ def file_restored?(object)
502
+ begin
503
+ restore = object.data.restore
504
+ if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
505
+ if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
506
+ expiry_date = DateTime.parse(restore[1])
507
+ return true if DateTime.now < expiry_date # restored
508
+ else
509
+ @logger.debug("No expiry-date header for restore request: #{object.data.restore}")
510
+ return nil # no expiry-date found for ongoing request
511
+ end
512
+ end
513
+ rescue => e
514
+ @logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
515
+ end
516
+ return false
517
+ end
518
+
519
+ module SinceDB
520
+ class File
521
+ def initialize(file)
522
+ @sincedb_path = file
523
+ end
524
+
525
+ def newer?(date)
526
+ date > read
527
+ end
528
+
529
+ def read
530
+ if ::File.exists?(@sincedb_path)
531
+ content = ::File.read(@sincedb_path).chomp.strip
532
+ # If the file was created but we didn't have the time to write to it
533
+ return content.empty? ? Time.new(0) : Time.parse(content)
534
+ else
535
+ return Time.new(0)
536
+ end
537
+ end
538
+
539
+ def write(since = nil)
540
+ since = Time.now() if since.nil?
541
+ ::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
542
+ end
543
+ end
544
+ end
545
+ end # class LogStash::Inputs::S3