logstash-input-s3-test 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +101 -0
- data/CONTRIBUTORS +19 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +117 -0
- data/docs/index.asciidoc +317 -0
- data/lib/logstash/inputs/s3.rb +545 -0
- data/lib/logstash/inputs/s3/patch.rb +20 -0
- data/logstash-input-s3.gemspec +32 -0
- data/spec/fixtures/cloudfront.log +4 -0
- data/spec/fixtures/compressed.log.gee.zip +0 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/json.log +2 -0
- data/spec/fixtures/json_with_message.log +2 -0
- data/spec/fixtures/multiline.log +6 -0
- data/spec/fixtures/multiple_compressed_streams.gz +0 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/inputs/s3_spec.rb +532 -0
- data/spec/inputs/sincedb_spec.rb +17 -0
- data/spec/integration/s3_spec.rb +61 -0
- data/spec/support/helpers.rb +45 -0
- metadata +189 -0
data/docs/index.asciidoc
ADDED
@@ -0,0 +1,317 @@
|
|
1
|
+
:plugin: s3
|
2
|
+
:type: input
|
3
|
+
:default_codec: plain
|
4
|
+
|
5
|
+
///////////////////////////////////////////
|
6
|
+
START - GENERATED VARIABLES, DO NOT EDIT!
|
7
|
+
///////////////////////////////////////////
|
8
|
+
:version: %VERSION%
|
9
|
+
:release_date: %RELEASE_DATE%
|
10
|
+
:changelog_url: %CHANGELOG_URL%
|
11
|
+
:include_path: ../../../../logstash/docs/include
|
12
|
+
///////////////////////////////////////////
|
13
|
+
END - GENERATED VARIABLES, DO NOT EDIT!
|
14
|
+
///////////////////////////////////////////
|
15
|
+
|
16
|
+
[id="plugins-{type}s-{plugin}"]
|
17
|
+
|
18
|
+
=== S3 input plugin
|
19
|
+
|
20
|
+
include::{include_path}/plugin_header.asciidoc[]
|
21
|
+
|
22
|
+
==== Description
|
23
|
+
|
24
|
+
Stream events from files from a S3 bucket.
|
25
|
+
|
26
|
+
IMPORTANT: The S3 input plugin only supports AWS S3.
|
27
|
+
Other S3 compatible storage solutions are not supported.
|
28
|
+
|
29
|
+
Each line from each file generates an event.
|
30
|
+
Files ending in `.gz` are handled as gzip'ed files.
|
31
|
+
|
32
|
+
Files that are archived to AWS Glacier will be skipped.
|
33
|
+
|
34
|
+
[id="plugins-{type}s-{plugin}-options"]
|
35
|
+
==== S3 Input Configuration Options
|
36
|
+
|
37
|
+
This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
|
38
|
+
|
39
|
+
[cols="<,<,<",options="header",]
|
40
|
+
|=======================================================================
|
41
|
+
|Setting |Input type|Required
|
42
|
+
| <<plugins-{type}s-{plugin}-access_key_id>> |<<string,string>>|No
|
43
|
+
| <<plugins-{type}s-{plugin}-additional_settings>> |<<hash,hash>>|No
|
44
|
+
| <<plugins-{type}s-{plugin}-aws_credentials_file>> |<<string,string>>|No
|
45
|
+
| <<plugins-{type}s-{plugin}-backup_add_prefix>> |<<string,string>>|No
|
46
|
+
| <<plugins-{type}s-{plugin}-backup_to_bucket>> |<<string,string>>|No
|
47
|
+
| <<plugins-{type}s-{plugin}-backup_to_dir>> |<<string,string>>|No
|
48
|
+
| <<plugins-{type}s-{plugin}-bucket>> |<<string,string>>|Yes
|
49
|
+
| <<plugins-{type}s-{plugin}-delete>> |<<boolean,boolean>>|No
|
50
|
+
| <<plugins-{type}s-{plugin}-endpoint>> |<<string,string>>|No
|
51
|
+
| <<plugins-{type}s-{plugin}-exclude_pattern>> |<<string,string>>|No
|
52
|
+
| <<plugins-{type}s-{plugin}-gzip_pattern>> |<<string,string>>|No
|
53
|
+
| <<plugins-{type}s-{plugin}-include_object_properties>> |<<boolean,boolean>>|No
|
54
|
+
| <<plugins-{type}s-{plugin}-interval>> |<<number,number>>|No
|
55
|
+
| <<plugins-{type}s-{plugin}-prefix>> |<<string,string>>|No
|
56
|
+
| <<plugins-{type}s-{plugin}-proxy_uri>> |<<string,string>>|No
|
57
|
+
| <<plugins-{type}s-{plugin}-region>> |<<string,string>>|No
|
58
|
+
| <<plugins-{type}s-{plugin}-role_arn>> |<<string,string>>|No
|
59
|
+
| <<plugins-{type}s-{plugin}-role_session_name>> |<<string,string>>|No
|
60
|
+
| <<plugins-{type}s-{plugin}-secret_access_key>> |<<string,string>>|No
|
61
|
+
| <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
|
62
|
+
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
63
|
+
| <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
|
64
|
+
| <<plugins-{type}s-{plugin}-watch_for_new_files>> |<<boolean,boolean>>|No
|
65
|
+
|=======================================================================
|
66
|
+
|
67
|
+
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
68
|
+
input plugins.
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
[id="plugins-{type}s-{plugin}-access_key_id"]
|
73
|
+
===== `access_key_id`
|
74
|
+
|
75
|
+
* Value type is <<string,string>>
|
76
|
+
* There is no default value for this setting.
|
77
|
+
|
78
|
+
This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
|
79
|
+
|
80
|
+
1. Static configuration, using `access_key_id` and `secret_access_key` params in logstash plugin config
|
81
|
+
2. External credentials file specified by `aws_credentials_file`
|
82
|
+
3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
|
83
|
+
4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
|
84
|
+
5. IAM Instance Profile (available when running inside EC2)
|
85
|
+
|
86
|
+
|
87
|
+
[id="plugins-{type}s-{plugin}-additional_settings"]
|
88
|
+
===== `additional_settings`
|
89
|
+
|
90
|
+
* Value type is <<hash,hash>>
|
91
|
+
* Default value is `{}`
|
92
|
+
|
93
|
+
Key-value pairs of settings and corresponding values used to parametrize
|
94
|
+
the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
|
95
|
+
|
96
|
+
[source,ruby]
|
97
|
+
input {
|
98
|
+
s3 {
|
99
|
+
"access_key_id" => "1234"
|
100
|
+
"secret_access_key" => "secret"
|
101
|
+
"bucket" => "logstash-test"
|
102
|
+
"additional_settings" => {
|
103
|
+
"force_path_style" => true
|
104
|
+
"follow_redirects" => false
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
[id="plugins-{type}s-{plugin}-aws_credentials_file"]
|
110
|
+
===== `aws_credentials_file`
|
111
|
+
|
112
|
+
* Value type is <<string,string>>
|
113
|
+
* There is no default value for this setting.
|
114
|
+
|
115
|
+
Path to YAML file containing a hash of AWS credentials.
|
116
|
+
This file will only be loaded if `access_key_id` and
|
117
|
+
`secret_access_key` aren't set. The contents of the
|
118
|
+
file should look like this:
|
119
|
+
|
120
|
+
[source,ruby]
|
121
|
+
----------------------------------
|
122
|
+
:access_key_id: "12345"
|
123
|
+
:secret_access_key: "54321"
|
124
|
+
----------------------------------
|
125
|
+
|
126
|
+
|
127
|
+
[id="plugins-{type}s-{plugin}-backup_add_prefix"]
|
128
|
+
===== `backup_add_prefix`
|
129
|
+
|
130
|
+
* Value type is <<string,string>>
|
131
|
+
* Default value is `nil`
|
132
|
+
|
133
|
+
Append a prefix to the key (full path including file name in s3) after processing.
|
134
|
+
If backing up to another (or the same) bucket, this effectively lets you
|
135
|
+
choose a new 'folder' to place the files in
|
136
|
+
|
137
|
+
[id="plugins-{type}s-{plugin}-backup_to_bucket"]
|
138
|
+
===== `backup_to_bucket`
|
139
|
+
|
140
|
+
* Value type is <<string,string>>
|
141
|
+
* Default value is `nil`
|
142
|
+
|
143
|
+
Name of a S3 bucket to backup processed files to.
|
144
|
+
|
145
|
+
[id="plugins-{type}s-{plugin}-backup_to_dir"]
|
146
|
+
===== `backup_to_dir`
|
147
|
+
|
148
|
+
* Value type is <<string,string>>
|
149
|
+
* Default value is `nil`
|
150
|
+
|
151
|
+
Path of a local directory to backup processed files to.
|
152
|
+
|
153
|
+
[id="plugins-{type}s-{plugin}-bucket"]
|
154
|
+
===== `bucket`
|
155
|
+
|
156
|
+
* This is a required setting.
|
157
|
+
* Value type is <<string,string>>
|
158
|
+
* There is no default value for this setting.
|
159
|
+
|
160
|
+
The name of the S3 bucket.
|
161
|
+
|
162
|
+
[id="plugins-{type}s-{plugin}-delete"]
|
163
|
+
===== `delete`
|
164
|
+
|
165
|
+
* Value type is <<boolean,boolean>>
|
166
|
+
* Default value is `false`
|
167
|
+
|
168
|
+
Whether to delete processed files from the original bucket.
|
169
|
+
|
170
|
+
[id="plugins-{type}s-{plugin}-endpoint"]
|
171
|
+
===== `endpoint`
|
172
|
+
|
173
|
+
* Value type is <<string,string>>
|
174
|
+
* There is no default value for this setting.
|
175
|
+
|
176
|
+
The endpoint to connect to. By default it is constructed using the value of `region`.
|
177
|
+
This is useful when connecting to S3 compatible services, but beware that these aren't
|
178
|
+
guaranteed to work correctly with the AWS SDK.
|
179
|
+
|
180
|
+
[id="plugins-{type}s-{plugin}-exclude_pattern"]
|
181
|
+
===== `exclude_pattern`
|
182
|
+
|
183
|
+
* Value type is <<string,string>>
|
184
|
+
* Default value is `nil`
|
185
|
+
|
186
|
+
Ruby style regexp of keys to exclude from the bucket.
|
187
|
+
|
188
|
+
Note that files matching the pattern are skipped _after_ they have been listed.
|
189
|
+
Consider using <<plugins-{type}s-{plugin}-prefix>> instead where possible.
|
190
|
+
|
191
|
+
Example:
|
192
|
+
|
193
|
+
[source,ruby]
|
194
|
+
-----
|
195
|
+
"exclude_pattern" => "\/2020\/04\/"
|
196
|
+
-----
|
197
|
+
|
198
|
+
This pattern excludes all logs containing "/2020/04/" in the path.
|
199
|
+
|
200
|
+
|
201
|
+
[id="plugins-{type}s-{plugin}-gzip_pattern"]
|
202
|
+
===== `gzip_pattern`
|
203
|
+
|
204
|
+
* Value type is <<string,string>>
|
205
|
+
* Default value is `"\.gz(ip)?$"`
|
206
|
+
|
207
|
+
Regular expression used to determine whether an input file is in gzip format.
|
208
|
+
|
209
|
+
[id="plugins-{type}s-{plugin}-include_object_properties"]
|
210
|
+
===== `include_object_properties`
|
211
|
+
|
212
|
+
* Value type is <<boolean,boolean>>
|
213
|
+
* Default value is `false`
|
214
|
+
|
215
|
+
Whether or not to include the S3 object's properties (last_modified, content_type, metadata) into each Event at
|
216
|
+
`[@metadata][s3]`. Regardless of this setting, `[@metadata][s3][key]` will always be present.
|
217
|
+
|
218
|
+
[id="plugins-{type}s-{plugin}-interval"]
|
219
|
+
===== `interval`
|
220
|
+
|
221
|
+
* Value type is <<number,number>>
|
222
|
+
* Default value is `60`
|
223
|
+
|
224
|
+
Interval to wait between to check the file list again after a run is finished.
|
225
|
+
Value is in seconds.
|
226
|
+
|
227
|
+
[id="plugins-{type}s-{plugin}-prefix"]
|
228
|
+
===== `prefix`
|
229
|
+
|
230
|
+
* Value type is <<string,string>>
|
231
|
+
* Default value is `nil`
|
232
|
+
|
233
|
+
If specified, the prefix of filenames in the bucket must match (not a regexp)
|
234
|
+
|
235
|
+
[id="plugins-{type}s-{plugin}-proxy_uri"]
|
236
|
+
===== `proxy_uri`
|
237
|
+
|
238
|
+
* Value type is <<string,string>>
|
239
|
+
* There is no default value for this setting.
|
240
|
+
|
241
|
+
URI to proxy server if required
|
242
|
+
|
243
|
+
[id="plugins-{type}s-{plugin}-region"]
|
244
|
+
===== `region`
|
245
|
+
|
246
|
+
* Value type is <<string,string>>
|
247
|
+
* Default value is `"us-east-1"`
|
248
|
+
|
249
|
+
The AWS Region
|
250
|
+
|
251
|
+
[id="plugins-{type}s-{plugin}-role_arn"]
|
252
|
+
===== `role_arn`
|
253
|
+
|
254
|
+
* Value type is <<string,string>>
|
255
|
+
* There is no default value for this setting.
|
256
|
+
|
257
|
+
The AWS IAM Role to assume, if any.
|
258
|
+
This is used to generate temporary credentials, typically for cross-account access.
|
259
|
+
See the https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html[AssumeRole API documentation] for more information.
|
260
|
+
|
261
|
+
[id="plugins-{type}s-{plugin}-role_session_name"]
|
262
|
+
===== `role_session_name`
|
263
|
+
|
264
|
+
* Value type is <<string,string>>
|
265
|
+
* Default value is `"logstash"`
|
266
|
+
|
267
|
+
Session name to use when assuming an IAM role.
|
268
|
+
|
269
|
+
[id="plugins-{type}s-{plugin}-secret_access_key"]
|
270
|
+
===== `secret_access_key`
|
271
|
+
|
272
|
+
* Value type is <<string,string>>
|
273
|
+
* There is no default value for this setting.
|
274
|
+
|
275
|
+
The AWS Secret Access Key
|
276
|
+
|
277
|
+
[id="plugins-{type}s-{plugin}-session_token"]
|
278
|
+
===== `session_token`
|
279
|
+
|
280
|
+
* Value type is <<string,string>>
|
281
|
+
* There is no default value for this setting.
|
282
|
+
|
283
|
+
The AWS Session token for temporary credential
|
284
|
+
|
285
|
+
[id="plugins-{type}s-{plugin}-sincedb_path"]
|
286
|
+
===== `sincedb_path`
|
287
|
+
|
288
|
+
* Value type is <<string,string>>
|
289
|
+
* Default value is `nil`
|
290
|
+
|
291
|
+
Where to write the since database (keeps track of the date
|
292
|
+
the last handled file was added to S3). The default will write
|
293
|
+
sincedb files to in the directory '{path.data}/plugins/inputs/s3/'
|
294
|
+
|
295
|
+
If specified, this setting must be a filename path and not just a directory.
|
296
|
+
|
297
|
+
[id="plugins-{type}s-{plugin}-temporary_directory"]
|
298
|
+
===== `temporary_directory`
|
299
|
+
|
300
|
+
* Value type is <<string,string>>
|
301
|
+
* Default value is `"/tmp/logstash"`
|
302
|
+
|
303
|
+
Set the directory where logstash will store the tmp files before processing them.
|
304
|
+
|
305
|
+
[id="plugins-{type}s-{plugin}-watch_for_new_files"]
|
306
|
+
===== `watch_for_new_files`
|
307
|
+
|
308
|
+
* Value type is <<boolean,boolean>>
|
309
|
+
* Default value is `true`
|
310
|
+
|
311
|
+
Whether or not to watch for new files.
|
312
|
+
Disabling this option causes the input to close itself after processing the files from a single listing.
|
313
|
+
|
314
|
+
[id="plugins-{type}s-{plugin}-common-options"]
|
315
|
+
include::{include_path}/{type}.asciidoc[]
|
316
|
+
|
317
|
+
:default_codec!:
|
@@ -0,0 +1,545 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "time"
|
5
|
+
require "date"
|
6
|
+
require "tmpdir"
|
7
|
+
require "stud/interval"
|
8
|
+
require "stud/temporary"
|
9
|
+
require "aws-sdk-s3"
|
10
|
+
require "logstash/inputs/s3/patch"
|
11
|
+
|
12
|
+
require 'java'
|
13
|
+
|
14
|
+
Aws.eager_autoload!
|
15
|
+
# Stream events from files from a S3 bucket.
|
16
|
+
#
|
17
|
+
# Each line from each file generates an event.
|
18
|
+
# Files ending in `.gz` are handled as gzip'ed files.
|
19
|
+
class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
20
|
+
|
21
|
+
java_import java.io.InputStream
|
22
|
+
java_import java.io.InputStreamReader
|
23
|
+
java_import java.io.FileInputStream
|
24
|
+
java_import java.io.BufferedReader
|
25
|
+
java_import java.util.zip.GZIPInputStream
|
26
|
+
java_import java.util.zip.ZipException
|
27
|
+
|
28
|
+
CredentialConfig = Struct.new(
|
29
|
+
:access_key_id,
|
30
|
+
:secret_access_key,
|
31
|
+
:session_token,
|
32
|
+
:profile,
|
33
|
+
:instance_profile_credentials_retries,
|
34
|
+
:instance_profile_credentials_timeout,
|
35
|
+
:region)
|
36
|
+
|
37
|
+
config_name "s3"
|
38
|
+
|
39
|
+
default :codec, "plain"
|
40
|
+
|
41
|
+
# The name of the S3 bucket.
|
42
|
+
config :bucket, :validate => :string, :required => true
|
43
|
+
|
44
|
+
# If specified, the prefix of filenames in the bucket must match (not a regexp)
|
45
|
+
config :prefix, :validate => :string, :default => nil
|
46
|
+
|
47
|
+
config :additional_settings, :validate => :hash, :default => {}
|
48
|
+
|
49
|
+
# The path to use for writing state. The state stored by this plugin is
|
50
|
+
# a memory of files already processed by this plugin.
|
51
|
+
#
|
52
|
+
# If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
|
53
|
+
#
|
54
|
+
# Should be a path with filename not just a directory.
|
55
|
+
config :sincedb_path, :validate => :string, :default => nil
|
56
|
+
|
57
|
+
# Name of a S3 bucket to backup processed files to.
|
58
|
+
config :backup_to_bucket, :validate => :string, :default => nil
|
59
|
+
|
60
|
+
# Append a prefix to the key (full path including file name in s3) after processing.
|
61
|
+
# If backing up to another (or the same) bucket, this effectively lets you
|
62
|
+
# choose a new 'folder' to place the files in
|
63
|
+
config :backup_add_prefix, :validate => :string, :default => nil
|
64
|
+
|
65
|
+
# Path of a local directory to backup processed files to.
|
66
|
+
config :backup_to_dir, :validate => :string, :default => nil
|
67
|
+
|
68
|
+
# Whether to delete processed files from the original bucket.
|
69
|
+
config :delete, :validate => :boolean, :default => false
|
70
|
+
|
71
|
+
# Interval to wait between to check the file list again after a run is finished.
|
72
|
+
# Value is in seconds.
|
73
|
+
config :interval, :validate => :number, :default => 60
|
74
|
+
|
75
|
+
# Whether to watch for new files with the interval.
|
76
|
+
# If false, overrides any interval and only lists the s3 bucket once.
|
77
|
+
config :watch_for_new_files, :validate => :boolean, :default => true
|
78
|
+
|
79
|
+
# Ruby style regexp of keys to exclude from the bucket
|
80
|
+
config :exclude_pattern, :validate => :string, :default => nil
|
81
|
+
|
82
|
+
# Set the directory where logstash will store the tmp files before processing them.
|
83
|
+
# default to the current OS temporary directory in linux /tmp/logstash
|
84
|
+
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
85
|
+
|
86
|
+
# Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
|
87
|
+
# into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
|
88
|
+
# be present.
|
89
|
+
config :include_object_properties, :validate => :boolean, :default => false
|
90
|
+
|
91
|
+
# Regular expression used to determine whether an input file is in gzip format.
|
92
|
+
# default to an expression that matches *.gz and *.gzip file extensions
|
93
|
+
config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
|
94
|
+
|
95
|
+
config :region, :validate => :string, :default => "us-east-1"
|
96
|
+
|
97
|
+
# This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
|
98
|
+
#
|
99
|
+
# 1. Static configuration, using `access_key_id` and `secret_access_key` params or `role_arn` in the logstash plugin config
|
100
|
+
# 2. External credentials file specified by `aws_credentials_file`
|
101
|
+
# 3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
|
102
|
+
# 4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
|
103
|
+
# 5. IAM Instance Profile (available when running inside EC2)
|
104
|
+
config :access_key_id, :validate => :string
|
105
|
+
|
106
|
+
# The AWS Secret Access Key
|
107
|
+
config :secret_access_key, :validate => :string
|
108
|
+
|
109
|
+
# Profile
|
110
|
+
config :profile, :validate => :string, :default => "default"
|
111
|
+
|
112
|
+
# The AWS Session token for temporary credential
|
113
|
+
config :session_token, :validate => :password
|
114
|
+
|
115
|
+
# URI to proxy server if required
|
116
|
+
config :proxy_uri, :validate => :string
|
117
|
+
|
118
|
+
# Custom endpoint to connect to s3
|
119
|
+
config :endpoint, :validate => :string
|
120
|
+
|
121
|
+
# The AWS IAM Role to assume, if any.
|
122
|
+
# This is used to generate temporary credentials typically for cross-account access.
|
123
|
+
# See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
|
124
|
+
config :role_arn, :validate => :string
|
125
|
+
|
126
|
+
# Session name to use when assuming an IAM role
|
127
|
+
config :role_session_name, :validate => :string, :default => "logstash"
|
128
|
+
|
129
|
+
# Path to YAML file containing a hash of AWS credentials.
|
130
|
+
# This file will only be loaded if `access_key_id` and
|
131
|
+
# `secret_access_key` aren't set. The contents of the
|
132
|
+
# file should look like this:
|
133
|
+
#
|
134
|
+
# [source,ruby]
|
135
|
+
# ----------------------------------
|
136
|
+
# :access_key_id: "12345"
|
137
|
+
# :secret_access_key: "54321"
|
138
|
+
# ----------------------------------
|
139
|
+
#
|
140
|
+
config :aws_credentials_file, :validate => :string
|
141
|
+
|
142
|
+
def register
|
143
|
+
require "fileutils"
|
144
|
+
require "digest/md5"
|
145
|
+
|
146
|
+
@logger.info("Registering", :bucket => @bucket, :region => @region)
|
147
|
+
|
148
|
+
s3 = get_s3object
|
149
|
+
|
150
|
+
@s3bucket = s3.bucket(@bucket)
|
151
|
+
|
152
|
+
unless @backup_to_bucket.nil?
|
153
|
+
@backup_bucket = s3.bucket(@backup_to_bucket)
|
154
|
+
begin
|
155
|
+
s3.client.head_bucket({ :bucket => @backup_to_bucket})
|
156
|
+
rescue Aws::S3::Errors::NoSuchBucket
|
157
|
+
s3.create_bucket({ :bucket => @backup_to_bucket})
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
unless @backup_to_dir.nil?
|
162
|
+
Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
|
163
|
+
end
|
164
|
+
|
165
|
+
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
166
|
+
|
167
|
+
if !@watch_for_new_files && original_params.include?('interval')
|
168
|
+
logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def run(queue)
|
173
|
+
@current_thread = Thread.current
|
174
|
+
Stud.interval(@interval) do
|
175
|
+
process_files(queue)
|
176
|
+
stop unless @watch_for_new_files
|
177
|
+
end
|
178
|
+
end # def run
|
179
|
+
|
180
|
+
def list_new_files
|
181
|
+
objects = {}
|
182
|
+
found = false
|
183
|
+
begin
|
184
|
+
@s3bucket.objects(:prefix => @prefix).each do |log|
|
185
|
+
found = true
|
186
|
+
@logger.debug('Found key', :key => log.key)
|
187
|
+
if ignore_filename?(log.key)
|
188
|
+
@logger.debug('Ignoring', :key => log.key)
|
189
|
+
elsif log.content_length <= 0
|
190
|
+
@logger.debug('Object Zero Length', :key => log.key)
|
191
|
+
elsif !sincedb.newer?(log.last_modified)
|
192
|
+
@logger.debug('Object Not Modified', :key => log.key)
|
193
|
+
elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
|
194
|
+
@logger.debug('Object Archived to Glacier', :key => log.key)
|
195
|
+
else
|
196
|
+
objects[log.key] = log.last_modified
|
197
|
+
@logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
@logger.info('No files found in bucket', :prefix => prefix) unless found
|
201
|
+
rescue Aws::Errors::ServiceError => e
|
202
|
+
@logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
|
203
|
+
end
|
204
|
+
objects.keys.sort {|a,b| objects[a] <=> objects[b]}
|
205
|
+
end # def fetch_new_files
|
206
|
+
|
207
|
+
def backup_to_bucket(object)
|
208
|
+
unless @backup_to_bucket.nil?
|
209
|
+
backup_key = "#{@backup_add_prefix}#{object.key}"
|
210
|
+
@backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
|
211
|
+
if @delete
|
212
|
+
object.delete()
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def backup_to_dir(filename)
|
218
|
+
unless @backup_to_dir.nil?
|
219
|
+
FileUtils.cp(filename, @backup_to_dir)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def process_files(queue)
|
224
|
+
objects = list_new_files
|
225
|
+
|
226
|
+
objects.each do |key|
|
227
|
+
if stop?
|
228
|
+
break
|
229
|
+
else
|
230
|
+
process_log(queue, key)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end # def process_files
|
234
|
+
|
235
|
+
def stop
|
236
|
+
# @current_thread is initialized in the `#run` method,
|
237
|
+
# this variable is needed because the `#stop` is a called in another thread
|
238
|
+
# than the `#run` method and requiring us to call stop! with a explicit thread.
|
239
|
+
Stud.stop!(@current_thread)
|
240
|
+
end
|
241
|
+
|
242
|
+
private
|
243
|
+
|
244
|
+
# Read the content of the local file
|
245
|
+
#
|
246
|
+
# @param [Queue] Where to push the event
|
247
|
+
# @param [String] Which file to read from
|
248
|
+
# @param [S3Object] Source s3 object
|
249
|
+
# @return [Boolean] True if the file was completely read, false otherwise.
|
250
|
+
def process_local_log(queue, filename, object)
|
251
|
+
@logger.debug('Processing file', :filename => filename)
|
252
|
+
metadata = {}
|
253
|
+
# Currently codecs operates on bytes instead of stream.
|
254
|
+
# So all IO stuff: decompression, reading need to be done in the actual
|
255
|
+
# input and send as bytes to the codecs.
|
256
|
+
read_file(filename) do |line|
|
257
|
+
if stop?
|
258
|
+
@logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
|
259
|
+
return false
|
260
|
+
end
|
261
|
+
|
262
|
+
@codec.decode(line) do |event|
|
263
|
+
# We are making an assumption concerning cloudfront
|
264
|
+
# log format, the user will use the plain or the line codec
|
265
|
+
# and the message key will represent the actual line content.
|
266
|
+
# If the event is only metadata the event will be drop.
|
267
|
+
# This was the behavior of the pre 1.5 plugin.
|
268
|
+
#
|
269
|
+
# The line need to go through the codecs to replace
|
270
|
+
# unknown bytes in the log stream before doing a regexp match or
|
271
|
+
# you will get a `Error: invalid byte sequence in UTF-8'
|
272
|
+
if event_is_metadata?(event)
|
273
|
+
@logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
|
274
|
+
update_metadata(metadata, event)
|
275
|
+
else
|
276
|
+
decorate(event)
|
277
|
+
|
278
|
+
event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
|
279
|
+
event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
|
280
|
+
|
281
|
+
if @include_object_properties
|
282
|
+
event.set("[@metadata][s3]", object.data.to_h)
|
283
|
+
else
|
284
|
+
event.set("[@metadata][s3]", {})
|
285
|
+
end
|
286
|
+
|
287
|
+
event.set("[@metadata][s3][key]", object.key)
|
288
|
+
|
289
|
+
queue << event
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
# #ensure any stateful codecs (such as multi-line ) are flushed to the queue
|
294
|
+
@codec.flush do |event|
|
295
|
+
queue << event
|
296
|
+
end
|
297
|
+
|
298
|
+
return true
|
299
|
+
end # def process_local_log
|
300
|
+
|
301
|
+
def event_is_metadata?(event)
|
302
|
+
return false unless event.get("message").class == String
|
303
|
+
line = event.get("message")
|
304
|
+
version_metadata?(line) || fields_metadata?(line)
|
305
|
+
end
|
306
|
+
|
307
|
+
def version_metadata?(line)
|
308
|
+
line.start_with?('#Version: ')
|
309
|
+
end
|
310
|
+
|
311
|
+
def fields_metadata?(line)
|
312
|
+
line.start_with?('#Fields: ')
|
313
|
+
end
|
314
|
+
|
315
|
+
def update_metadata(metadata, event)
|
316
|
+
line = event.get('message').strip
|
317
|
+
|
318
|
+
if version_metadata?(line)
|
319
|
+
metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
|
320
|
+
end
|
321
|
+
|
322
|
+
if fields_metadata?(line)
|
323
|
+
metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
def read_file(filename, &block)
|
328
|
+
if gzip?(filename)
|
329
|
+
read_gzip_file(filename, block)
|
330
|
+
else
|
331
|
+
read_plain_file(filename, block)
|
332
|
+
end
|
333
|
+
rescue => e
|
334
|
+
# skip any broken file
|
335
|
+
@logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
|
336
|
+
end
|
337
|
+
|
338
|
+
def read_plain_file(filename, block)
|
339
|
+
File.open(filename, 'rb') do |file|
|
340
|
+
file.each(&block)
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def read_gzip_file(filename, block)
|
345
|
+
file_stream = FileInputStream.new(filename)
|
346
|
+
gzip_stream = GZIPInputStream.new(file_stream)
|
347
|
+
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
348
|
+
buffered = BufferedReader.new(decoder)
|
349
|
+
|
350
|
+
while (line = buffered.readLine())
|
351
|
+
block.call(line)
|
352
|
+
end
|
353
|
+
ensure
|
354
|
+
buffered.close unless buffered.nil?
|
355
|
+
decoder.close unless decoder.nil?
|
356
|
+
gzip_stream.close unless gzip_stream.nil?
|
357
|
+
file_stream.close unless file_stream.nil?
|
358
|
+
end
|
359
|
+
|
360
|
+
def gzip?(filename)
|
361
|
+
Regexp.new(@gzip_pattern).match(filename)
|
362
|
+
end
|
363
|
+
|
364
|
+
def sincedb
|
365
|
+
@sincedb ||= if @sincedb_path.nil?
|
366
|
+
@logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
|
367
|
+
SinceDB::File.new(sincedb_file)
|
368
|
+
else
|
369
|
+
@logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
|
370
|
+
SinceDB::File.new(@sincedb_path)
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
def sincedb_file
|
375
|
+
digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
|
376
|
+
dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
|
377
|
+
FileUtils::mkdir_p(dir)
|
378
|
+
path = File.join(dir, "sincedb_#{digest}")
|
379
|
+
|
380
|
+
# Migrate old default sincedb path to new one.
|
381
|
+
if ENV["HOME"]
|
382
|
+
# This is the old file path including the old digest mechanism.
|
383
|
+
# It remains as a way to automatically upgrade users with the old default ($HOME)
|
384
|
+
# to the new default (path.data)
|
385
|
+
old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
|
386
|
+
if File.exist?(old)
|
387
|
+
logger.info("Migrating old sincedb in $HOME to {path.data}")
|
388
|
+
FileUtils.mv(old, path)
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
path
|
393
|
+
end
|
394
|
+
|
395
|
+
def symbolized_settings
|
396
|
+
@symbolized_settings ||= symbolize(@additional_settings)
|
397
|
+
end
|
398
|
+
|
399
|
+
def symbolize(hash)
|
400
|
+
return hash unless hash.is_a?(Hash)
|
401
|
+
symbolized = {}
|
402
|
+
hash.each { |key, value| symbolized[key.to_sym] = symbolize(value) }
|
403
|
+
symbolized
|
404
|
+
end
|
405
|
+
|
406
|
+
def ignore_filename?(filename)
|
407
|
+
if @prefix == filename
|
408
|
+
return true
|
409
|
+
elsif filename.end_with?("/")
|
410
|
+
return true
|
411
|
+
elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
|
412
|
+
return true
|
413
|
+
elsif @exclude_pattern.nil?
|
414
|
+
return false
|
415
|
+
elsif filename =~ Regexp.new(@exclude_pattern)
|
416
|
+
return true
|
417
|
+
else
|
418
|
+
return false
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
def process_log(queue, key)
|
423
|
+
@logger.debug("Processing", :bucket => @bucket, :key => key)
|
424
|
+
object = @s3bucket.object(key)
|
425
|
+
|
426
|
+
filename = File.join(temporary_directory, File.basename(key))
|
427
|
+
if download_remote_file(object, filename)
|
428
|
+
if process_local_log(queue, filename, object)
|
429
|
+
lastmod = object.last_modified
|
430
|
+
backup_to_bucket(object)
|
431
|
+
backup_to_dir(filename)
|
432
|
+
delete_file_from_bucket(object)
|
433
|
+
FileUtils.remove_entry_secure(filename, true)
|
434
|
+
sincedb.write(lastmod)
|
435
|
+
end
|
436
|
+
else
|
437
|
+
FileUtils.remove_entry_secure(filename, true)
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
# Stream the remove file to the local disk
|
442
|
+
#
|
443
|
+
# @param [S3Object] Reference to the remove S3 objec to download
|
444
|
+
# @param [String] The Temporary filename to stream to.
|
445
|
+
# @return [Boolean] True if the file was completely downloaded
|
446
|
+
def download_remote_file(remote_object, local_filename)
|
447
|
+
completed = false
|
448
|
+
@logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
|
449
|
+
File.open(local_filename, 'wb') do |s3file|
|
450
|
+
return completed if stop?
|
451
|
+
begin
|
452
|
+
remote_object.get(:response_target => s3file)
|
453
|
+
completed = true
|
454
|
+
rescue Aws::Errors::ServiceError => e
|
455
|
+
@logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
|
456
|
+
end
|
457
|
+
end
|
458
|
+
completed
|
459
|
+
end
|
460
|
+
|
461
|
+
def delete_file_from_bucket(object)
|
462
|
+
if @delete and @backup_to_bucket.nil?
|
463
|
+
object.delete()
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
def aws_options_hash
|
468
|
+
opts = {}
|
469
|
+
|
470
|
+
if @access_key_id.is_a?(NilClass) ^ @secret_access_key.is_a?(NilClass)
|
471
|
+
@logger.warn("Likely config error: Only one of access_key_id or secret_access_key was provided but not both.")
|
472
|
+
end
|
473
|
+
|
474
|
+
credential_config = CredentialConfig.new(@access_key_id, @secret_access_key, @session_token, @profile, 0, 1, @region)
|
475
|
+
@credentials = Aws::CredentialProviderChain.new(credential_config).resolve
|
476
|
+
|
477
|
+
opts[:credentials] = @credentials
|
478
|
+
|
479
|
+
opts[:http_proxy] = @proxy_uri if @proxy_uri
|
480
|
+
|
481
|
+
if self.respond_to?(:aws_service_endpoint)
|
482
|
+
# used by CloudWatch to basically do the same as bellow (returns { region: region })
|
483
|
+
opts.merge!(self.aws_service_endpoint(@region))
|
484
|
+
else
|
485
|
+
# NOTE: setting :region works with the aws sdk (resolves correct endpoint)
|
486
|
+
opts[:region] = @region
|
487
|
+
end
|
488
|
+
|
489
|
+
if !@endpoint.is_a?(NilClass)
|
490
|
+
opts[:endpoint] = @endpoint
|
491
|
+
end
|
492
|
+
|
493
|
+
return opts
|
494
|
+
end
|
495
|
+
|
496
|
+
def get_s3object
|
497
|
+
options = symbolized_settings.merge(aws_options_hash || {})
|
498
|
+
s3 = Aws::S3::Resource.new(options)
|
499
|
+
end
|
500
|
+
|
501
|
+
def file_restored?(object)
|
502
|
+
begin
|
503
|
+
restore = object.data.restore
|
504
|
+
if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
|
505
|
+
if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
|
506
|
+
expiry_date = DateTime.parse(restore[1])
|
507
|
+
return true if DateTime.now < expiry_date # restored
|
508
|
+
else
|
509
|
+
@logger.debug("No expiry-date header for restore request: #{object.data.restore}")
|
510
|
+
return nil # no expiry-date found for ongoing request
|
511
|
+
end
|
512
|
+
end
|
513
|
+
rescue => e
|
514
|
+
@logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
|
515
|
+
end
|
516
|
+
return false
|
517
|
+
end
|
518
|
+
|
519
|
+
module SinceDB
|
520
|
+
class File
|
521
|
+
def initialize(file)
|
522
|
+
@sincedb_path = file
|
523
|
+
end
|
524
|
+
|
525
|
+
def newer?(date)
|
526
|
+
date > read
|
527
|
+
end
|
528
|
+
|
529
|
+
def read
|
530
|
+
if ::File.exists?(@sincedb_path)
|
531
|
+
content = ::File.read(@sincedb_path).chomp.strip
|
532
|
+
# If the file was created but we didn't have the time to write to it
|
533
|
+
return content.empty? ? Time.new(0) : Time.parse(content)
|
534
|
+
else
|
535
|
+
return Time.new(0)
|
536
|
+
end
|
537
|
+
end
|
538
|
+
|
539
|
+
def write(since = nil)
|
540
|
+
since = Time.now() if since.nil?
|
541
|
+
::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
end # class LogStash::Inputs::S3
|