logstash-input-s3-test 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +101 -0
- data/CONTRIBUTORS +19 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +117 -0
- data/docs/index.asciidoc +317 -0
- data/lib/logstash/inputs/s3.rb +545 -0
- data/lib/logstash/inputs/s3/patch.rb +20 -0
- data/logstash-input-s3.gemspec +32 -0
- data/spec/fixtures/cloudfront.log +4 -0
- data/spec/fixtures/compressed.log.gee.zip +0 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/json.log +2 -0
- data/spec/fixtures/json_with_message.log +2 -0
- data/spec/fixtures/multiline.log +6 -0
- data/spec/fixtures/multiple_compressed_streams.gz +0 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/inputs/s3_spec.rb +532 -0
- data/spec/inputs/sincedb_spec.rb +17 -0
- data/spec/integration/s3_spec.rb +61 -0
- data/spec/support/helpers.rb +45 -0
- metadata +189 -0
data/docs/index.asciidoc
ADDED
@@ -0,0 +1,317 @@
|
|
1
|
+
:plugin: s3
|
2
|
+
:type: input
|
3
|
+
:default_codec: plain
|
4
|
+
|
5
|
+
///////////////////////////////////////////
|
6
|
+
START - GENERATED VARIABLES, DO NOT EDIT!
|
7
|
+
///////////////////////////////////////////
|
8
|
+
:version: %VERSION%
|
9
|
+
:release_date: %RELEASE_DATE%
|
10
|
+
:changelog_url: %CHANGELOG_URL%
|
11
|
+
:include_path: ../../../../logstash/docs/include
|
12
|
+
///////////////////////////////////////////
|
13
|
+
END - GENERATED VARIABLES, DO NOT EDIT!
|
14
|
+
///////////////////////////////////////////
|
15
|
+
|
16
|
+
[id="plugins-{type}s-{plugin}"]
|
17
|
+
|
18
|
+
=== S3 input plugin
|
19
|
+
|
20
|
+
include::{include_path}/plugin_header.asciidoc[]
|
21
|
+
|
22
|
+
==== Description
|
23
|
+
|
24
|
+
Stream events from files from a S3 bucket.
|
25
|
+
|
26
|
+
IMPORTANT: The S3 input plugin only supports AWS S3.
|
27
|
+
Other S3 compatible storage solutions are not supported.
|
28
|
+
|
29
|
+
Each line from each file generates an event.
|
30
|
+
Files ending in `.gz` are handled as gzip'ed files.
|
31
|
+
|
32
|
+
Files that are archived to AWS Glacier will be skipped.
|
33
|
+
|
34
|
+
[id="plugins-{type}s-{plugin}-options"]
|
35
|
+
==== S3 Input Configuration Options
|
36
|
+
|
37
|
+
This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
|
38
|
+
|
39
|
+
[cols="<,<,<",options="header",]
|
40
|
+
|=======================================================================
|
41
|
+
|Setting |Input type|Required
|
42
|
+
| <<plugins-{type}s-{plugin}-access_key_id>> |<<string,string>>|No
|
43
|
+
| <<plugins-{type}s-{plugin}-additional_settings>> |<<hash,hash>>|No
|
44
|
+
| <<plugins-{type}s-{plugin}-aws_credentials_file>> |<<string,string>>|No
|
45
|
+
| <<plugins-{type}s-{plugin}-backup_add_prefix>> |<<string,string>>|No
|
46
|
+
| <<plugins-{type}s-{plugin}-backup_to_bucket>> |<<string,string>>|No
|
47
|
+
| <<plugins-{type}s-{plugin}-backup_to_dir>> |<<string,string>>|No
|
48
|
+
| <<plugins-{type}s-{plugin}-bucket>> |<<string,string>>|Yes
|
49
|
+
| <<plugins-{type}s-{plugin}-delete>> |<<boolean,boolean>>|No
|
50
|
+
| <<plugins-{type}s-{plugin}-endpoint>> |<<string,string>>|No
|
51
|
+
| <<plugins-{type}s-{plugin}-exclude_pattern>> |<<string,string>>|No
|
52
|
+
| <<plugins-{type}s-{plugin}-gzip_pattern>> |<<string,string>>|No
|
53
|
+
| <<plugins-{type}s-{plugin}-include_object_properties>> |<<boolean,boolean>>|No
|
54
|
+
| <<plugins-{type}s-{plugin}-interval>> |<<number,number>>|No
|
55
|
+
| <<plugins-{type}s-{plugin}-prefix>> |<<string,string>>|No
|
56
|
+
| <<plugins-{type}s-{plugin}-proxy_uri>> |<<string,string>>|No
|
57
|
+
| <<plugins-{type}s-{plugin}-region>> |<<string,string>>|No
|
58
|
+
| <<plugins-{type}s-{plugin}-role_arn>> |<<string,string>>|No
|
59
|
+
| <<plugins-{type}s-{plugin}-role_session_name>> |<<string,string>>|No
|
60
|
+
| <<plugins-{type}s-{plugin}-secret_access_key>> |<<string,string>>|No
|
61
|
+
| <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
|
62
|
+
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
63
|
+
| <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
|
64
|
+
| <<plugins-{type}s-{plugin}-watch_for_new_files>> |<<boolean,boolean>>|No
|
65
|
+
|=======================================================================
|
66
|
+
|
67
|
+
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
68
|
+
input plugins.
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
[id="plugins-{type}s-{plugin}-access_key_id"]
|
73
|
+
===== `access_key_id`
|
74
|
+
|
75
|
+
* Value type is <<string,string>>
|
76
|
+
* There is no default value for this setting.
|
77
|
+
|
78
|
+
This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
|
79
|
+
|
80
|
+
1. Static configuration, using `access_key_id` and `secret_access_key` params in logstash plugin config
|
81
|
+
2. External credentials file specified by `aws_credentials_file`
|
82
|
+
3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
|
83
|
+
4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
|
84
|
+
5. IAM Instance Profile (available when running inside EC2)
|
85
|
+
|
86
|
+
|
87
|
+
[id="plugins-{type}s-{plugin}-additional_settings"]
|
88
|
+
===== `additional_settings`
|
89
|
+
|
90
|
+
* Value type is <<hash,hash>>
|
91
|
+
* Default value is `{}`
|
92
|
+
|
93
|
+
Key-value pairs of settings and corresponding values used to parametrize
|
94
|
+
the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
|
95
|
+
|
96
|
+
[source,ruby]
|
97
|
+
input {
|
98
|
+
s3 {
|
99
|
+
"access_key_id" => "1234"
|
100
|
+
"secret_access_key" => "secret"
|
101
|
+
"bucket" => "logstash-test"
|
102
|
+
"additional_settings" => {
|
103
|
+
"force_path_style" => true
|
104
|
+
"follow_redirects" => false
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
[id="plugins-{type}s-{plugin}-aws_credentials_file"]
|
110
|
+
===== `aws_credentials_file`
|
111
|
+
|
112
|
+
* Value type is <<string,string>>
|
113
|
+
* There is no default value for this setting.
|
114
|
+
|
115
|
+
Path to YAML file containing a hash of AWS credentials.
|
116
|
+
This file will only be loaded if `access_key_id` and
|
117
|
+
`secret_access_key` aren't set. The contents of the
|
118
|
+
file should look like this:
|
119
|
+
|
120
|
+
[source,ruby]
|
121
|
+
----------------------------------
|
122
|
+
:access_key_id: "12345"
|
123
|
+
:secret_access_key: "54321"
|
124
|
+
----------------------------------
|
125
|
+
|
126
|
+
|
127
|
+
[id="plugins-{type}s-{plugin}-backup_add_prefix"]
|
128
|
+
===== `backup_add_prefix`
|
129
|
+
|
130
|
+
* Value type is <<string,string>>
|
131
|
+
* Default value is `nil`
|
132
|
+
|
133
|
+
Append a prefix to the key (full path including file name in s3) after processing.
|
134
|
+
If backing up to another (or the same) bucket, this effectively lets you
|
135
|
+
choose a new 'folder' to place the files in
|
136
|
+
|
137
|
+
[id="plugins-{type}s-{plugin}-backup_to_bucket"]
|
138
|
+
===== `backup_to_bucket`
|
139
|
+
|
140
|
+
* Value type is <<string,string>>
|
141
|
+
* Default value is `nil`
|
142
|
+
|
143
|
+
Name of a S3 bucket to backup processed files to.
|
144
|
+
|
145
|
+
[id="plugins-{type}s-{plugin}-backup_to_dir"]
|
146
|
+
===== `backup_to_dir`
|
147
|
+
|
148
|
+
* Value type is <<string,string>>
|
149
|
+
* Default value is `nil`
|
150
|
+
|
151
|
+
Path of a local directory to backup processed files to.
|
152
|
+
|
153
|
+
[id="plugins-{type}s-{plugin}-bucket"]
|
154
|
+
===== `bucket`
|
155
|
+
|
156
|
+
* This is a required setting.
|
157
|
+
* Value type is <<string,string>>
|
158
|
+
* There is no default value for this setting.
|
159
|
+
|
160
|
+
The name of the S3 bucket.
|
161
|
+
|
162
|
+
[id="plugins-{type}s-{plugin}-delete"]
|
163
|
+
===== `delete`
|
164
|
+
|
165
|
+
* Value type is <<boolean,boolean>>
|
166
|
+
* Default value is `false`
|
167
|
+
|
168
|
+
Whether to delete processed files from the original bucket.
|
169
|
+
|
170
|
+
[id="plugins-{type}s-{plugin}-endpoint"]
|
171
|
+
===== `endpoint`
|
172
|
+
|
173
|
+
* Value type is <<string,string>>
|
174
|
+
* There is no default value for this setting.
|
175
|
+
|
176
|
+
The endpoint to connect to. By default it is constructed using the value of `region`.
|
177
|
+
This is useful when connecting to S3 compatible services, but beware that these aren't
|
178
|
+
guaranteed to work correctly with the AWS SDK.
|
179
|
+
|
180
|
+
[id="plugins-{type}s-{plugin}-exclude_pattern"]
|
181
|
+
===== `exclude_pattern`
|
182
|
+
|
183
|
+
* Value type is <<string,string>>
|
184
|
+
* Default value is `nil`
|
185
|
+
|
186
|
+
Ruby style regexp of keys to exclude from the bucket.
|
187
|
+
|
188
|
+
Note that files matching the pattern are skipped _after_ they have been listed.
|
189
|
+
Consider using <<plugins-{type}s-{plugin}-prefix>> instead where possible.
|
190
|
+
|
191
|
+
Example:
|
192
|
+
|
193
|
+
[source,ruby]
|
194
|
+
-----
|
195
|
+
"exclude_pattern" => "\/2020\/04\/"
|
196
|
+
-----
|
197
|
+
|
198
|
+
This pattern excludes all logs containing "/2020/04/" in the path.
|
199
|
+
|
200
|
+
|
201
|
+
[id="plugins-{type}s-{plugin}-gzip_pattern"]
|
202
|
+
===== `gzip_pattern`
|
203
|
+
|
204
|
+
* Value type is <<string,string>>
|
205
|
+
* Default value is `"\.gz(ip)?$"`
|
206
|
+
|
207
|
+
Regular expression used to determine whether an input file is in gzip format.
|
208
|
+
|
209
|
+
[id="plugins-{type}s-{plugin}-include_object_properties"]
|
210
|
+
===== `include_object_properties`
|
211
|
+
|
212
|
+
* Value type is <<boolean,boolean>>
|
213
|
+
* Default value is `false`
|
214
|
+
|
215
|
+
Whether or not to include the S3 object's properties (last_modified, content_type, metadata) into each Event at
|
216
|
+
`[@metadata][s3]`. Regardless of this setting, `[@metadata][s3][key]` will always be present.
|
217
|
+
|
218
|
+
[id="plugins-{type}s-{plugin}-interval"]
|
219
|
+
===== `interval`
|
220
|
+
|
221
|
+
* Value type is <<number,number>>
|
222
|
+
* Default value is `60`
|
223
|
+
|
224
|
+
Interval to wait between to check the file list again after a run is finished.
|
225
|
+
Value is in seconds.
|
226
|
+
|
227
|
+
[id="plugins-{type}s-{plugin}-prefix"]
|
228
|
+
===== `prefix`
|
229
|
+
|
230
|
+
* Value type is <<string,string>>
|
231
|
+
* Default value is `nil`
|
232
|
+
|
233
|
+
If specified, the prefix of filenames in the bucket must match (not a regexp)
|
234
|
+
|
235
|
+
[id="plugins-{type}s-{plugin}-proxy_uri"]
|
236
|
+
===== `proxy_uri`
|
237
|
+
|
238
|
+
* Value type is <<string,string>>
|
239
|
+
* There is no default value for this setting.
|
240
|
+
|
241
|
+
URI to proxy server if required
|
242
|
+
|
243
|
+
[id="plugins-{type}s-{plugin}-region"]
|
244
|
+
===== `region`
|
245
|
+
|
246
|
+
* Value type is <<string,string>>
|
247
|
+
* Default value is `"us-east-1"`
|
248
|
+
|
249
|
+
The AWS Region
|
250
|
+
|
251
|
+
[id="plugins-{type}s-{plugin}-role_arn"]
|
252
|
+
===== `role_arn`
|
253
|
+
|
254
|
+
* Value type is <<string,string>>
|
255
|
+
* There is no default value for this setting.
|
256
|
+
|
257
|
+
The AWS IAM Role to assume, if any.
|
258
|
+
This is used to generate temporary credentials, typically for cross-account access.
|
259
|
+
See the https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html[AssumeRole API documentation] for more information.
|
260
|
+
|
261
|
+
[id="plugins-{type}s-{plugin}-role_session_name"]
|
262
|
+
===== `role_session_name`
|
263
|
+
|
264
|
+
* Value type is <<string,string>>
|
265
|
+
* Default value is `"logstash"`
|
266
|
+
|
267
|
+
Session name to use when assuming an IAM role.
|
268
|
+
|
269
|
+
[id="plugins-{type}s-{plugin}-secret_access_key"]
|
270
|
+
===== `secret_access_key`
|
271
|
+
|
272
|
+
* Value type is <<string,string>>
|
273
|
+
* There is no default value for this setting.
|
274
|
+
|
275
|
+
The AWS Secret Access Key
|
276
|
+
|
277
|
+
[id="plugins-{type}s-{plugin}-session_token"]
|
278
|
+
===== `session_token`
|
279
|
+
|
280
|
+
* Value type is <<string,string>>
|
281
|
+
* There is no default value for this setting.
|
282
|
+
|
283
|
+
The AWS Session token for temporary credential
|
284
|
+
|
285
|
+
[id="plugins-{type}s-{plugin}-sincedb_path"]
|
286
|
+
===== `sincedb_path`
|
287
|
+
|
288
|
+
* Value type is <<string,string>>
|
289
|
+
* Default value is `nil`
|
290
|
+
|
291
|
+
Where to write the since database (keeps track of the date
|
292
|
+
the last handled file was added to S3). The default will write
|
293
|
+
sincedb files to in the directory '{path.data}/plugins/inputs/s3/'
|
294
|
+
|
295
|
+
If specified, this setting must be a filename path and not just a directory.
|
296
|
+
|
297
|
+
[id="plugins-{type}s-{plugin}-temporary_directory"]
|
298
|
+
===== `temporary_directory`
|
299
|
+
|
300
|
+
* Value type is <<string,string>>
|
301
|
+
* Default value is `"/tmp/logstash"`
|
302
|
+
|
303
|
+
Set the directory where logstash will store the tmp files before processing them.
|
304
|
+
|
305
|
+
[id="plugins-{type}s-{plugin}-watch_for_new_files"]
|
306
|
+
===== `watch_for_new_files`
|
307
|
+
|
308
|
+
* Value type is <<boolean,boolean>>
|
309
|
+
* Default value is `true`
|
310
|
+
|
311
|
+
Whether or not to watch for new files.
|
312
|
+
Disabling this option causes the input to close itself after processing the files from a single listing.
|
313
|
+
|
314
|
+
[id="plugins-{type}s-{plugin}-common-options"]
|
315
|
+
include::{include_path}/{type}.asciidoc[]
|
316
|
+
|
317
|
+
:default_codec!:
|
@@ -0,0 +1,545 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "time"
|
5
|
+
require "date"
|
6
|
+
require "tmpdir"
|
7
|
+
require "stud/interval"
|
8
|
+
require "stud/temporary"
|
9
|
+
require "aws-sdk-s3"
|
10
|
+
require "logstash/inputs/s3/patch"
|
11
|
+
|
12
|
+
require 'java'
|
13
|
+
|
14
|
+
Aws.eager_autoload!
|
15
|
+
# Stream events from files from a S3 bucket.
|
16
|
+
#
|
17
|
+
# Each line from each file generates an event.
|
18
|
+
# Files ending in `.gz` are handled as gzip'ed files.
|
19
|
+
class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
20
|
+
|
21
|
+
java_import java.io.InputStream
|
22
|
+
java_import java.io.InputStreamReader
|
23
|
+
java_import java.io.FileInputStream
|
24
|
+
java_import java.io.BufferedReader
|
25
|
+
java_import java.util.zip.GZIPInputStream
|
26
|
+
java_import java.util.zip.ZipException
|
27
|
+
|
28
|
+
CredentialConfig = Struct.new(
|
29
|
+
:access_key_id,
|
30
|
+
:secret_access_key,
|
31
|
+
:session_token,
|
32
|
+
:profile,
|
33
|
+
:instance_profile_credentials_retries,
|
34
|
+
:instance_profile_credentials_timeout,
|
35
|
+
:region)
|
36
|
+
|
37
|
+
config_name "s3"
|
38
|
+
|
39
|
+
default :codec, "plain"
|
40
|
+
|
41
|
+
# The name of the S3 bucket.
|
42
|
+
config :bucket, :validate => :string, :required => true
|
43
|
+
|
44
|
+
# If specified, the prefix of filenames in the bucket must match (not a regexp)
|
45
|
+
config :prefix, :validate => :string, :default => nil
|
46
|
+
|
47
|
+
config :additional_settings, :validate => :hash, :default => {}
|
48
|
+
|
49
|
+
# The path to use for writing state. The state stored by this plugin is
|
50
|
+
# a memory of files already processed by this plugin.
|
51
|
+
#
|
52
|
+
# If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
|
53
|
+
#
|
54
|
+
# Should be a path with filename not just a directory.
|
55
|
+
config :sincedb_path, :validate => :string, :default => nil
|
56
|
+
|
57
|
+
# Name of a S3 bucket to backup processed files to.
|
58
|
+
config :backup_to_bucket, :validate => :string, :default => nil
|
59
|
+
|
60
|
+
# Append a prefix to the key (full path including file name in s3) after processing.
|
61
|
+
# If backing up to another (or the same) bucket, this effectively lets you
|
62
|
+
# choose a new 'folder' to place the files in
|
63
|
+
config :backup_add_prefix, :validate => :string, :default => nil
|
64
|
+
|
65
|
+
# Path of a local directory to backup processed files to.
|
66
|
+
config :backup_to_dir, :validate => :string, :default => nil
|
67
|
+
|
68
|
+
# Whether to delete processed files from the original bucket.
|
69
|
+
config :delete, :validate => :boolean, :default => false
|
70
|
+
|
71
|
+
# Interval to wait between to check the file list again after a run is finished.
|
72
|
+
# Value is in seconds.
|
73
|
+
config :interval, :validate => :number, :default => 60
|
74
|
+
|
75
|
+
# Whether to watch for new files with the interval.
|
76
|
+
# If false, overrides any interval and only lists the s3 bucket once.
|
77
|
+
config :watch_for_new_files, :validate => :boolean, :default => true
|
78
|
+
|
79
|
+
# Ruby style regexp of keys to exclude from the bucket
|
80
|
+
config :exclude_pattern, :validate => :string, :default => nil
|
81
|
+
|
82
|
+
# Set the directory where logstash will store the tmp files before processing them.
|
83
|
+
# default to the current OS temporary directory in linux /tmp/logstash
|
84
|
+
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
85
|
+
|
86
|
+
# Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
|
87
|
+
# into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
|
88
|
+
# be present.
|
89
|
+
config :include_object_properties, :validate => :boolean, :default => false
|
90
|
+
|
91
|
+
# Regular expression used to determine whether an input file is in gzip format.
|
92
|
+
# default to an expression that matches *.gz and *.gzip file extensions
|
93
|
+
config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
|
94
|
+
|
95
|
+
config :region, :validate => :string, :default => "us-east-1"
|
96
|
+
|
97
|
+
# This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
|
98
|
+
#
|
99
|
+
# 1. Static configuration, using `access_key_id` and `secret_access_key` params or `role_arn` in the logstash plugin config
|
100
|
+
# 2. External credentials file specified by `aws_credentials_file`
|
101
|
+
# 3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
|
102
|
+
# 4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
|
103
|
+
# 5. IAM Instance Profile (available when running inside EC2)
|
104
|
+
config :access_key_id, :validate => :string
|
105
|
+
|
106
|
+
# The AWS Secret Access Key
|
107
|
+
config :secret_access_key, :validate => :string
|
108
|
+
|
109
|
+
# Profile
|
110
|
+
config :profile, :validate => :string, :default => "default"
|
111
|
+
|
112
|
+
# The AWS Session token for temporary credential
|
113
|
+
config :session_token, :validate => :password
|
114
|
+
|
115
|
+
# URI to proxy server if required
|
116
|
+
config :proxy_uri, :validate => :string
|
117
|
+
|
118
|
+
# Custom endpoint to connect to s3
|
119
|
+
config :endpoint, :validate => :string
|
120
|
+
|
121
|
+
# The AWS IAM Role to assume, if any.
|
122
|
+
# This is used to generate temporary credentials typically for cross-account access.
|
123
|
+
# See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
|
124
|
+
config :role_arn, :validate => :string
|
125
|
+
|
126
|
+
# Session name to use when assuming an IAM role
|
127
|
+
config :role_session_name, :validate => :string, :default => "logstash"
|
128
|
+
|
129
|
+
# Path to YAML file containing a hash of AWS credentials.
|
130
|
+
# This file will only be loaded if `access_key_id` and
|
131
|
+
# `secret_access_key` aren't set. The contents of the
|
132
|
+
# file should look like this:
|
133
|
+
#
|
134
|
+
# [source,ruby]
|
135
|
+
# ----------------------------------
|
136
|
+
# :access_key_id: "12345"
|
137
|
+
# :secret_access_key: "54321"
|
138
|
+
# ----------------------------------
|
139
|
+
#
|
140
|
+
config :aws_credentials_file, :validate => :string
|
141
|
+
|
142
|
+
def register
|
143
|
+
require "fileutils"
|
144
|
+
require "digest/md5"
|
145
|
+
|
146
|
+
@logger.info("Registering", :bucket => @bucket, :region => @region)
|
147
|
+
|
148
|
+
s3 = get_s3object
|
149
|
+
|
150
|
+
@s3bucket = s3.bucket(@bucket)
|
151
|
+
|
152
|
+
unless @backup_to_bucket.nil?
|
153
|
+
@backup_bucket = s3.bucket(@backup_to_bucket)
|
154
|
+
begin
|
155
|
+
s3.client.head_bucket({ :bucket => @backup_to_bucket})
|
156
|
+
rescue Aws::S3::Errors::NoSuchBucket
|
157
|
+
s3.create_bucket({ :bucket => @backup_to_bucket})
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
unless @backup_to_dir.nil?
|
162
|
+
Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
|
163
|
+
end
|
164
|
+
|
165
|
+
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
166
|
+
|
167
|
+
if !@watch_for_new_files && original_params.include?('interval')
|
168
|
+
logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def run(queue)
|
173
|
+
@current_thread = Thread.current
|
174
|
+
Stud.interval(@interval) do
|
175
|
+
process_files(queue)
|
176
|
+
stop unless @watch_for_new_files
|
177
|
+
end
|
178
|
+
end # def run
|
179
|
+
|
180
|
+
def list_new_files
|
181
|
+
objects = {}
|
182
|
+
found = false
|
183
|
+
begin
|
184
|
+
@s3bucket.objects(:prefix => @prefix).each do |log|
|
185
|
+
found = true
|
186
|
+
@logger.debug('Found key', :key => log.key)
|
187
|
+
if ignore_filename?(log.key)
|
188
|
+
@logger.debug('Ignoring', :key => log.key)
|
189
|
+
elsif log.content_length <= 0
|
190
|
+
@logger.debug('Object Zero Length', :key => log.key)
|
191
|
+
elsif !sincedb.newer?(log.last_modified)
|
192
|
+
@logger.debug('Object Not Modified', :key => log.key)
|
193
|
+
elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
|
194
|
+
@logger.debug('Object Archived to Glacier', :key => log.key)
|
195
|
+
else
|
196
|
+
objects[log.key] = log.last_modified
|
197
|
+
@logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
@logger.info('No files found in bucket', :prefix => prefix) unless found
|
201
|
+
rescue Aws::Errors::ServiceError => e
|
202
|
+
@logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
|
203
|
+
end
|
204
|
+
objects.keys.sort {|a,b| objects[a] <=> objects[b]}
|
205
|
+
end # def fetch_new_files
|
206
|
+
|
207
|
+
def backup_to_bucket(object)
|
208
|
+
unless @backup_to_bucket.nil?
|
209
|
+
backup_key = "#{@backup_add_prefix}#{object.key}"
|
210
|
+
@backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
|
211
|
+
if @delete
|
212
|
+
object.delete()
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def backup_to_dir(filename)
|
218
|
+
unless @backup_to_dir.nil?
|
219
|
+
FileUtils.cp(filename, @backup_to_dir)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def process_files(queue)
|
224
|
+
objects = list_new_files
|
225
|
+
|
226
|
+
objects.each do |key|
|
227
|
+
if stop?
|
228
|
+
break
|
229
|
+
else
|
230
|
+
process_log(queue, key)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end # def process_files
|
234
|
+
|
235
|
+
def stop
|
236
|
+
# @current_thread is initialized in the `#run` method,
|
237
|
+
# this variable is needed because the `#stop` is a called in another thread
|
238
|
+
# than the `#run` method and requiring us to call stop! with a explicit thread.
|
239
|
+
Stud.stop!(@current_thread)
|
240
|
+
end
|
241
|
+
|
242
|
+
private
|
243
|
+
|
244
|
+
# Read the content of the local file
|
245
|
+
#
|
246
|
+
# @param [Queue] Where to push the event
|
247
|
+
# @param [String] Which file to read from
|
248
|
+
# @param [S3Object] Source s3 object
|
249
|
+
# @return [Boolean] True if the file was completely read, false otherwise.
|
250
|
+
def process_local_log(queue, filename, object)
|
251
|
+
@logger.debug('Processing file', :filename => filename)
|
252
|
+
metadata = {}
|
253
|
+
# Currently codecs operates on bytes instead of stream.
|
254
|
+
# So all IO stuff: decompression, reading need to be done in the actual
|
255
|
+
# input and send as bytes to the codecs.
|
256
|
+
read_file(filename) do |line|
|
257
|
+
if stop?
|
258
|
+
@logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
|
259
|
+
return false
|
260
|
+
end
|
261
|
+
|
262
|
+
@codec.decode(line) do |event|
|
263
|
+
# We are making an assumption concerning cloudfront
|
264
|
+
# log format, the user will use the plain or the line codec
|
265
|
+
# and the message key will represent the actual line content.
|
266
|
+
# If the event is only metadata the event will be drop.
|
267
|
+
# This was the behavior of the pre 1.5 plugin.
|
268
|
+
#
|
269
|
+
# The line need to go through the codecs to replace
|
270
|
+
# unknown bytes in the log stream before doing a regexp match or
|
271
|
+
# you will get a `Error: invalid byte sequence in UTF-8'
|
272
|
+
if event_is_metadata?(event)
|
273
|
+
@logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
|
274
|
+
update_metadata(metadata, event)
|
275
|
+
else
|
276
|
+
decorate(event)
|
277
|
+
|
278
|
+
event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
|
279
|
+
event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
|
280
|
+
|
281
|
+
if @include_object_properties
|
282
|
+
event.set("[@metadata][s3]", object.data.to_h)
|
283
|
+
else
|
284
|
+
event.set("[@metadata][s3]", {})
|
285
|
+
end
|
286
|
+
|
287
|
+
event.set("[@metadata][s3][key]", object.key)
|
288
|
+
|
289
|
+
queue << event
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
# #ensure any stateful codecs (such as multi-line ) are flushed to the queue
|
294
|
+
@codec.flush do |event|
|
295
|
+
queue << event
|
296
|
+
end
|
297
|
+
|
298
|
+
return true
|
299
|
+
end # def process_local_log
|
300
|
+
|
301
|
+
def event_is_metadata?(event)
|
302
|
+
return false unless event.get("message").class == String
|
303
|
+
line = event.get("message")
|
304
|
+
version_metadata?(line) || fields_metadata?(line)
|
305
|
+
end
|
306
|
+
|
307
|
+
def version_metadata?(line)
|
308
|
+
line.start_with?('#Version: ')
|
309
|
+
end
|
310
|
+
|
311
|
+
def fields_metadata?(line)
|
312
|
+
line.start_with?('#Fields: ')
|
313
|
+
end
|
314
|
+
|
315
|
+
def update_metadata(metadata, event)
|
316
|
+
line = event.get('message').strip
|
317
|
+
|
318
|
+
if version_metadata?(line)
|
319
|
+
metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
|
320
|
+
end
|
321
|
+
|
322
|
+
if fields_metadata?(line)
|
323
|
+
metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
def read_file(filename, &block)
|
328
|
+
if gzip?(filename)
|
329
|
+
read_gzip_file(filename, block)
|
330
|
+
else
|
331
|
+
read_plain_file(filename, block)
|
332
|
+
end
|
333
|
+
rescue => e
|
334
|
+
# skip any broken file
|
335
|
+
@logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
|
336
|
+
end
|
337
|
+
|
338
|
+
def read_plain_file(filename, block)
|
339
|
+
File.open(filename, 'rb') do |file|
|
340
|
+
file.each(&block)
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def read_gzip_file(filename, block)
|
345
|
+
file_stream = FileInputStream.new(filename)
|
346
|
+
gzip_stream = GZIPInputStream.new(file_stream)
|
347
|
+
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
348
|
+
buffered = BufferedReader.new(decoder)
|
349
|
+
|
350
|
+
while (line = buffered.readLine())
|
351
|
+
block.call(line)
|
352
|
+
end
|
353
|
+
ensure
|
354
|
+
buffered.close unless buffered.nil?
|
355
|
+
decoder.close unless decoder.nil?
|
356
|
+
gzip_stream.close unless gzip_stream.nil?
|
357
|
+
file_stream.close unless file_stream.nil?
|
358
|
+
end
|
359
|
+
|
360
|
+
def gzip?(filename)
|
361
|
+
Regexp.new(@gzip_pattern).match(filename)
|
362
|
+
end
|
363
|
+
|
364
|
+
def sincedb
|
365
|
+
@sincedb ||= if @sincedb_path.nil?
|
366
|
+
@logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
|
367
|
+
SinceDB::File.new(sincedb_file)
|
368
|
+
else
|
369
|
+
@logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
|
370
|
+
SinceDB::File.new(@sincedb_path)
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
def sincedb_file
|
375
|
+
digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
|
376
|
+
dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
|
377
|
+
FileUtils::mkdir_p(dir)
|
378
|
+
path = File.join(dir, "sincedb_#{digest}")
|
379
|
+
|
380
|
+
# Migrate old default sincedb path to new one.
|
381
|
+
if ENV["HOME"]
|
382
|
+
# This is the old file path including the old digest mechanism.
|
383
|
+
# It remains as a way to automatically upgrade users with the old default ($HOME)
|
384
|
+
# to the new default (path.data)
|
385
|
+
old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
|
386
|
+
if File.exist?(old)
|
387
|
+
logger.info("Migrating old sincedb in $HOME to {path.data}")
|
388
|
+
FileUtils.mv(old, path)
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
path
|
393
|
+
end
|
394
|
+
|
395
|
+
def symbolized_settings
|
396
|
+
@symbolized_settings ||= symbolize(@additional_settings)
|
397
|
+
end
|
398
|
+
|
399
|
+
def symbolize(hash)
|
400
|
+
return hash unless hash.is_a?(Hash)
|
401
|
+
symbolized = {}
|
402
|
+
hash.each { |key, value| symbolized[key.to_sym] = symbolize(value) }
|
403
|
+
symbolized
|
404
|
+
end
|
405
|
+
|
406
|
+
def ignore_filename?(filename)
|
407
|
+
if @prefix == filename
|
408
|
+
return true
|
409
|
+
elsif filename.end_with?("/")
|
410
|
+
return true
|
411
|
+
elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
|
412
|
+
return true
|
413
|
+
elsif @exclude_pattern.nil?
|
414
|
+
return false
|
415
|
+
elsif filename =~ Regexp.new(@exclude_pattern)
|
416
|
+
return true
|
417
|
+
else
|
418
|
+
return false
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
def process_log(queue, key)
|
423
|
+
@logger.debug("Processing", :bucket => @bucket, :key => key)
|
424
|
+
object = @s3bucket.object(key)
|
425
|
+
|
426
|
+
filename = File.join(temporary_directory, File.basename(key))
|
427
|
+
if download_remote_file(object, filename)
|
428
|
+
if process_local_log(queue, filename, object)
|
429
|
+
lastmod = object.last_modified
|
430
|
+
backup_to_bucket(object)
|
431
|
+
backup_to_dir(filename)
|
432
|
+
delete_file_from_bucket(object)
|
433
|
+
FileUtils.remove_entry_secure(filename, true)
|
434
|
+
sincedb.write(lastmod)
|
435
|
+
end
|
436
|
+
else
|
437
|
+
FileUtils.remove_entry_secure(filename, true)
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
# Stream the remove file to the local disk
|
442
|
+
#
|
443
|
+
# @param [S3Object] Reference to the remove S3 objec to download
|
444
|
+
# @param [String] The Temporary filename to stream to.
|
445
|
+
# @return [Boolean] True if the file was completely downloaded
|
446
|
+
def download_remote_file(remote_object, local_filename)
|
447
|
+
completed = false
|
448
|
+
@logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
|
449
|
+
File.open(local_filename, 'wb') do |s3file|
|
450
|
+
return completed if stop?
|
451
|
+
begin
|
452
|
+
remote_object.get(:response_target => s3file)
|
453
|
+
completed = true
|
454
|
+
rescue Aws::Errors::ServiceError => e
|
455
|
+
@logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
|
456
|
+
end
|
457
|
+
end
|
458
|
+
completed
|
459
|
+
end
|
460
|
+
|
461
|
+
def delete_file_from_bucket(object)
|
462
|
+
if @delete and @backup_to_bucket.nil?
|
463
|
+
object.delete()
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
def aws_options_hash
|
468
|
+
opts = {}
|
469
|
+
|
470
|
+
if @access_key_id.is_a?(NilClass) ^ @secret_access_key.is_a?(NilClass)
|
471
|
+
@logger.warn("Likely config error: Only one of access_key_id or secret_access_key was provided but not both.")
|
472
|
+
end
|
473
|
+
|
474
|
+
credential_config = CredentialConfig.new(@access_key_id, @secret_access_key, @session_token, @profile, 0, 1, @region)
|
475
|
+
@credentials = Aws::CredentialProviderChain.new(credential_config).resolve
|
476
|
+
|
477
|
+
opts[:credentials] = @credentials
|
478
|
+
|
479
|
+
opts[:http_proxy] = @proxy_uri if @proxy_uri
|
480
|
+
|
481
|
+
if self.respond_to?(:aws_service_endpoint)
|
482
|
+
# used by CloudWatch to basically do the same as bellow (returns { region: region })
|
483
|
+
opts.merge!(self.aws_service_endpoint(@region))
|
484
|
+
else
|
485
|
+
# NOTE: setting :region works with the aws sdk (resolves correct endpoint)
|
486
|
+
opts[:region] = @region
|
487
|
+
end
|
488
|
+
|
489
|
+
if !@endpoint.is_a?(NilClass)
|
490
|
+
opts[:endpoint] = @endpoint
|
491
|
+
end
|
492
|
+
|
493
|
+
return opts
|
494
|
+
end
|
495
|
+
|
496
|
+
def get_s3object
|
497
|
+
options = symbolized_settings.merge(aws_options_hash || {})
|
498
|
+
s3 = Aws::S3::Resource.new(options)
|
499
|
+
end
|
500
|
+
|
501
|
+
def file_restored?(object)
|
502
|
+
begin
|
503
|
+
restore = object.data.restore
|
504
|
+
if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
|
505
|
+
if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
|
506
|
+
expiry_date = DateTime.parse(restore[1])
|
507
|
+
return true if DateTime.now < expiry_date # restored
|
508
|
+
else
|
509
|
+
@logger.debug("No expiry-date header for restore request: #{object.data.restore}")
|
510
|
+
return nil # no expiry-date found for ongoing request
|
511
|
+
end
|
512
|
+
end
|
513
|
+
rescue => e
|
514
|
+
@logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
|
515
|
+
end
|
516
|
+
return false
|
517
|
+
end
|
518
|
+
|
519
|
+
module SinceDB
|
520
|
+
class File
|
521
|
+
def initialize(file)
|
522
|
+
@sincedb_path = file
|
523
|
+
end
|
524
|
+
|
525
|
+
def newer?(date)
|
526
|
+
date > read
|
527
|
+
end
|
528
|
+
|
529
|
+
def read
|
530
|
+
if ::File.exists?(@sincedb_path)
|
531
|
+
content = ::File.read(@sincedb_path).chomp.strip
|
532
|
+
# If the file was created but we didn't have the time to write to it
|
533
|
+
return content.empty? ? Time.new(0) : Time.parse(content)
|
534
|
+
else
|
535
|
+
return Time.new(0)
|
536
|
+
end
|
537
|
+
end
|
538
|
+
|
539
|
+
def write(since = nil)
|
540
|
+
since = Time.now() if since.nil?
|
541
|
+
::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
end # class LogStash::Inputs::S3
|