logstash-input-s3-r2 3.8.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +123 -0
- data/CONTRIBUTORS +19 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +117 -0
- data/docs/index.asciidoc +345 -0
- data/lib/logstash/inputs/s3/patch.rb +20 -0
- data/lib/logstash/inputs/s3r2.rb +469 -0
- data/logstash-input-s3.gemspec +31 -0
- data/spec/fixtures/cloudfront.log +4 -0
- data/spec/fixtures/compressed.log.gee.zip +0 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/json.log +2 -0
- data/spec/fixtures/json_with_message.log +2 -0
- data/spec/fixtures/multiline.log +6 -0
- data/spec/fixtures/multiple_compressed_streams.gz +0 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/inputs/s3_spec.rb +612 -0
- data/spec/inputs/sincedb_spec.rb +17 -0
- data/spec/integration/s3_spec.rb +67 -0
- data/spec/support/helpers.rb +52 -0
- metadata +189 -0
data/docs/index.asciidoc
ADDED
@@ -0,0 +1,345 @@
|
|
1
|
+
:plugin: s3
|
2
|
+
:type: input
|
3
|
+
:default_codec: plain
|
4
|
+
|
5
|
+
///////////////////////////////////////////
|
6
|
+
START - GENERATED VARIABLES, DO NOT EDIT!
|
7
|
+
///////////////////////////////////////////
|
8
|
+
:version: %VERSION%
|
9
|
+
:release_date: %RELEASE_DATE%
|
10
|
+
:changelog_url: %CHANGELOG_URL%
|
11
|
+
:include_path: ../../../../logstash/docs/include
|
12
|
+
///////////////////////////////////////////
|
13
|
+
END - GENERATED VARIABLES, DO NOT EDIT!
|
14
|
+
///////////////////////////////////////////
|
15
|
+
|
16
|
+
[id="plugins-{type}s-{plugin}"]
|
17
|
+
|
18
|
+
=== S3 input plugin
|
19
|
+
|
20
|
+
include::{include_path}/plugin_header.asciidoc[]
|
21
|
+
|
22
|
+
==== Description
|
23
|
+
|
24
|
+
Stream events from files from a S3 bucket.
|
25
|
+
|
26
|
+
IMPORTANT: The S3 input plugin only supports AWS S3.
|
27
|
+
Other S3 compatible storage solutions are not supported.
|
28
|
+
|
29
|
+
Each line from each file generates an event.
|
30
|
+
Files ending in `.gz` are handled as gzip'ed files.
|
31
|
+
|
32
|
+
Files that are archived to AWS Glacier will be skipped.
|
33
|
+
|
34
|
+
[id="plugins-{type}s-{plugin}-ecs_metadata"]
|
35
|
+
==== Event Metadata and the Elastic Common Schema (ECS)
|
36
|
+
This plugin adds cloudfront metadata to event.
|
37
|
+
When ECS compatibility is disabled, the value is stored in the root level.
|
38
|
+
When ECS is enabled, the value is stored in the `@metadata` where it can be used by other plugins in your pipeline.
|
39
|
+
|
40
|
+
Here’s how ECS compatibility mode affects output.
|
41
|
+
[cols="<l,<l,e,<e"]
|
42
|
+
|=======================================================================
|
43
|
+
| ECS disabled | ECS v1 | Availability | Description
|
44
|
+
|
45
|
+
| cloudfront_fields | [@metadata][s3][cloudfront][fields] | available when the file is a CloudFront log | column names of log
|
46
|
+
| cloudfront_version | [@metadata][s3][cloudfront][version] | available when the file is a CloudFront log | version of log
|
47
|
+
|=======================================================================
|
48
|
+
|
49
|
+
[id="plugins-{type}s-{plugin}-options"]
|
50
|
+
==== S3 Input Configuration Options
|
51
|
+
|
52
|
+
This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
|
53
|
+
|
54
|
+
[cols="<,<,<",options="header",]
|
55
|
+
|=======================================================================
|
56
|
+
|Setting |Input type|Required
|
57
|
+
| <<plugins-{type}s-{plugin}-access_key_id>> |<<string,string>>|No
|
58
|
+
| <<plugins-{type}s-{plugin}-additional_settings>> |<<hash,hash>>|No
|
59
|
+
| <<plugins-{type}s-{plugin}-aws_credentials_file>> |<<string,string>>|No
|
60
|
+
| <<plugins-{type}s-{plugin}-backup_add_prefix>> |<<string,string>>|No
|
61
|
+
| <<plugins-{type}s-{plugin}-backup_to_bucket>> |<<string,string>>|No
|
62
|
+
| <<plugins-{type}s-{plugin}-backup_to_dir>> |<<string,string>>|No
|
63
|
+
| <<plugins-{type}s-{plugin}-bucket>> |<<string,string>>|Yes
|
64
|
+
| <<plugins-{type}s-{plugin}-delete>> |<<boolean,boolean>>|No
|
65
|
+
| <<plugins-{type}s-{plugin}-ecs_compatibility>> |<<string,string>>|No
|
66
|
+
| <<plugins-{type}s-{plugin}-endpoint>> |<<string,string>>|No
|
67
|
+
| <<plugins-{type}s-{plugin}-exclude_pattern>> |<<string,string>>|No
|
68
|
+
| <<plugins-{type}s-{plugin}-gzip_pattern>> |<<string,string>>|No
|
69
|
+
| <<plugins-{type}s-{plugin}-include_object_properties>> |<<boolean,boolean>>|No
|
70
|
+
| <<plugins-{type}s-{plugin}-interval>> |<<number,number>>|No
|
71
|
+
| <<plugins-{type}s-{plugin}-prefix>> |<<string,string>>|No
|
72
|
+
| <<plugins-{type}s-{plugin}-proxy_uri>> |<<string,string>>|No
|
73
|
+
| <<plugins-{type}s-{plugin}-region>> |<<string,string>>|No
|
74
|
+
| <<plugins-{type}s-{plugin}-role_arn>> |<<string,string>>|No
|
75
|
+
| <<plugins-{type}s-{plugin}-role_session_name>> |<<string,string>>|No
|
76
|
+
| <<plugins-{type}s-{plugin}-secret_access_key>> |<<string,string>>|No
|
77
|
+
| <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
|
78
|
+
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
79
|
+
| <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
|
80
|
+
| <<plugins-{type}s-{plugin}-watch_for_new_files>> |<<boolean,boolean>>|No
|
81
|
+
|=======================================================================
|
82
|
+
|
83
|
+
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
84
|
+
input plugins.
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
[id="plugins-{type}s-{plugin}-access_key_id"]
|
89
|
+
===== `access_key_id`
|
90
|
+
|
91
|
+
* Value type is <<string,string>>
|
92
|
+
* There is no default value for this setting.
|
93
|
+
|
94
|
+
This plugin uses the AWS SDK and supports several ways to get credentials, which will be tried in this order:
|
95
|
+
|
96
|
+
1. Static configuration, using `access_key_id` and `secret_access_key` params in logstash plugin config
|
97
|
+
2. External credentials file specified by `aws_credentials_file`
|
98
|
+
3. Environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
|
99
|
+
4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
|
100
|
+
5. IAM Instance Profile (available when running inside EC2)
|
101
|
+
|
102
|
+
|
103
|
+
[id="plugins-{type}s-{plugin}-additional_settings"]
|
104
|
+
===== `additional_settings`
|
105
|
+
|
106
|
+
* Value type is <<hash,hash>>
|
107
|
+
* Default value is `{}`
|
108
|
+
|
109
|
+
Key-value pairs of settings and corresponding values used to parametrize
|
110
|
+
the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
|
111
|
+
|
112
|
+
[source,ruby]
|
113
|
+
input {
|
114
|
+
s3 {
|
115
|
+
access_key_id => "1234"
|
116
|
+
secret_access_key => "secret"
|
117
|
+
bucket => "logstash-test"
|
118
|
+
additional_settings => {
|
119
|
+
force_path_style => true
|
120
|
+
follow_redirects => false
|
121
|
+
}
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
[id="plugins-{type}s-{plugin}-aws_credentials_file"]
|
126
|
+
===== `aws_credentials_file`
|
127
|
+
|
128
|
+
* Value type is <<string,string>>
|
129
|
+
* There is no default value for this setting.
|
130
|
+
|
131
|
+
Path to YAML file containing a hash of AWS credentials.
|
132
|
+
This file will only be loaded if `access_key_id` and
|
133
|
+
`secret_access_key` aren't set. The contents of the
|
134
|
+
file should look like this:
|
135
|
+
|
136
|
+
[source,ruby]
|
137
|
+
----------------------------------
|
138
|
+
:access_key_id: "12345"
|
139
|
+
:secret_access_key: "54321"
|
140
|
+
----------------------------------
|
141
|
+
|
142
|
+
|
143
|
+
[id="plugins-{type}s-{plugin}-backup_add_prefix"]
|
144
|
+
===== `backup_add_prefix`
|
145
|
+
|
146
|
+
* Value type is <<string,string>>
|
147
|
+
* Default value is `nil`
|
148
|
+
|
149
|
+
Append a prefix to the key (full path including file name in s3) after processing.
|
150
|
+
If backing up to another (or the same) bucket, this effectively lets you
|
151
|
+
choose a new 'folder' to place the files in
|
152
|
+
|
153
|
+
[id="plugins-{type}s-{plugin}-backup_to_bucket"]
|
154
|
+
===== `backup_to_bucket`
|
155
|
+
|
156
|
+
* Value type is <<string,string>>
|
157
|
+
* Default value is `nil`
|
158
|
+
|
159
|
+
Name of a S3 bucket to backup processed files to.
|
160
|
+
|
161
|
+
[id="plugins-{type}s-{plugin}-backup_to_dir"]
|
162
|
+
===== `backup_to_dir`
|
163
|
+
|
164
|
+
* Value type is <<string,string>>
|
165
|
+
* Default value is `nil`
|
166
|
+
|
167
|
+
Path of a local directory to backup processed files to.
|
168
|
+
|
169
|
+
[id="plugins-{type}s-{plugin}-bucket"]
|
170
|
+
===== `bucket`
|
171
|
+
|
172
|
+
* This is a required setting.
|
173
|
+
* Value type is <<string,string>>
|
174
|
+
* There is no default value for this setting.
|
175
|
+
|
176
|
+
The name of the S3 bucket.
|
177
|
+
|
178
|
+
[id="plugins-{type}s-{plugin}-delete"]
|
179
|
+
===== `delete`
|
180
|
+
|
181
|
+
* Value type is <<boolean,boolean>>
|
182
|
+
* Default value is `false`
|
183
|
+
|
184
|
+
Whether to delete processed files from the original bucket.
|
185
|
+
|
186
|
+
[id="plugins-{type}s-{plugin}-ecs_compatibility"]
|
187
|
+
===== `ecs_compatibility`
|
188
|
+
|
189
|
+
* Value type is <<string,string>>
|
190
|
+
* Supported values are:
|
191
|
+
** `disabled`: does not use ECS-compatible field names
|
192
|
+
** `v1`,`v8`: uses metadata fields that are compatible with Elastic Common Schema
|
193
|
+
|
194
|
+
Controls this plugin's compatibility with the
|
195
|
+
{ecs-ref}[Elastic Common Schema (ECS)].
|
196
|
+
See <<plugins-{type}s-{plugin}-ecs_metadata>> for detailed information.
|
197
|
+
|
198
|
+
[id="plugins-{type}s-{plugin}-endpoint"]
|
199
|
+
===== `endpoint`
|
200
|
+
|
201
|
+
* Value type is <<string,string>>
|
202
|
+
* There is no default value for this setting.
|
203
|
+
|
204
|
+
The endpoint to connect to. By default it is constructed using the value of `region`.
|
205
|
+
This is useful when connecting to S3 compatible services, but beware that these aren't
|
206
|
+
guaranteed to work correctly with the AWS SDK.
|
207
|
+
|
208
|
+
[id="plugins-{type}s-{plugin}-exclude_pattern"]
|
209
|
+
===== `exclude_pattern`
|
210
|
+
|
211
|
+
* Value type is <<string,string>>
|
212
|
+
* Default value is `nil`
|
213
|
+
|
214
|
+
Ruby style regexp of keys to exclude from the bucket.
|
215
|
+
|
216
|
+
Note that files matching the pattern are skipped _after_ they have been listed.
|
217
|
+
Consider using <<plugins-{type}s-{plugin}-prefix>> instead where possible.
|
218
|
+
|
219
|
+
Example:
|
220
|
+
|
221
|
+
[source,ruby]
|
222
|
+
-----
|
223
|
+
"exclude_pattern" => "\/2020\/04\/"
|
224
|
+
-----
|
225
|
+
|
226
|
+
This pattern excludes all logs containing "/2020/04/" in the path.
|
227
|
+
|
228
|
+
|
229
|
+
[id="plugins-{type}s-{plugin}-gzip_pattern"]
|
230
|
+
===== `gzip_pattern`
|
231
|
+
|
232
|
+
* Value type is <<string,string>>
|
233
|
+
* Default value is `"\.gz(ip)?$"`
|
234
|
+
|
235
|
+
Regular expression used to determine whether an input file is in gzip format.
|
236
|
+
|
237
|
+
[id="plugins-{type}s-{plugin}-include_object_properties"]
|
238
|
+
===== `include_object_properties`
|
239
|
+
|
240
|
+
* Value type is <<boolean,boolean>>
|
241
|
+
* Default value is `false`
|
242
|
+
|
243
|
+
Whether or not to include the S3 object's properties (last_modified, content_type, metadata) into each Event at
|
244
|
+
`[@metadata][s3]`. Regardless of this setting, `[@metadata][s3][key]` will always be present.
|
245
|
+
|
246
|
+
[id="plugins-{type}s-{plugin}-interval"]
|
247
|
+
===== `interval`
|
248
|
+
|
249
|
+
* Value type is <<number,number>>
|
250
|
+
* Default value is `60`
|
251
|
+
|
252
|
+
Interval to wait between to check the file list again after a run is finished.
|
253
|
+
Value is in seconds.
|
254
|
+
|
255
|
+
[id="plugins-{type}s-{plugin}-prefix"]
|
256
|
+
===== `prefix`
|
257
|
+
|
258
|
+
* Value type is <<string,string>>
|
259
|
+
* Default value is `nil`
|
260
|
+
|
261
|
+
If specified, the prefix of filenames in the bucket must match (not a regexp)
|
262
|
+
|
263
|
+
[id="plugins-{type}s-{plugin}-proxy_uri"]
|
264
|
+
===== `proxy_uri`
|
265
|
+
|
266
|
+
* Value type is <<string,string>>
|
267
|
+
* There is no default value for this setting.
|
268
|
+
|
269
|
+
URI to proxy server if required
|
270
|
+
|
271
|
+
[id="plugins-{type}s-{plugin}-region"]
|
272
|
+
===== `region`
|
273
|
+
|
274
|
+
* Value type is <<string,string>>
|
275
|
+
* Default value is `"us-east-1"`
|
276
|
+
|
277
|
+
The AWS Region
|
278
|
+
|
279
|
+
[id="plugins-{type}s-{plugin}-role_arn"]
|
280
|
+
===== `role_arn`
|
281
|
+
|
282
|
+
* Value type is <<string,string>>
|
283
|
+
* There is no default value for this setting.
|
284
|
+
|
285
|
+
The AWS IAM Role to assume, if any.
|
286
|
+
This is used to generate temporary credentials, typically for cross-account access.
|
287
|
+
See the https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html[AssumeRole API documentation] for more information.
|
288
|
+
|
289
|
+
[id="plugins-{type}s-{plugin}-role_session_name"]
|
290
|
+
===== `role_session_name`
|
291
|
+
|
292
|
+
* Value type is <<string,string>>
|
293
|
+
* Default value is `"logstash"`
|
294
|
+
|
295
|
+
Session name to use when assuming an IAM role.
|
296
|
+
|
297
|
+
[id="plugins-{type}s-{plugin}-secret_access_key"]
|
298
|
+
===== `secret_access_key`
|
299
|
+
|
300
|
+
* Value type is <<string,string>>
|
301
|
+
* There is no default value for this setting.
|
302
|
+
|
303
|
+
The AWS Secret Access Key
|
304
|
+
|
305
|
+
[id="plugins-{type}s-{plugin}-session_token"]
|
306
|
+
===== `session_token`
|
307
|
+
|
308
|
+
* Value type is <<string,string>>
|
309
|
+
* There is no default value for this setting.
|
310
|
+
|
311
|
+
The AWS Session token for temporary credential
|
312
|
+
|
313
|
+
[id="plugins-{type}s-{plugin}-sincedb_path"]
|
314
|
+
===== `sincedb_path`
|
315
|
+
|
316
|
+
* Value type is <<string,string>>
|
317
|
+
* Default value is `nil`
|
318
|
+
|
319
|
+
Where to write the since database (keeps track of the date
|
320
|
+
the last handled file was added to S3). The default will write
|
321
|
+
sincedb files to in the directory '{path.data}/plugins/inputs/s3/'
|
322
|
+
|
323
|
+
If specified, this setting must be a filename path and not just a directory.
|
324
|
+
|
325
|
+
[id="plugins-{type}s-{plugin}-temporary_directory"]
|
326
|
+
===== `temporary_directory`
|
327
|
+
|
328
|
+
* Value type is <<string,string>>
|
329
|
+
* Default value is `"/tmp/logstash"`
|
330
|
+
|
331
|
+
Set the directory where logstash will store the tmp files before processing them.
|
332
|
+
|
333
|
+
[id="plugins-{type}s-{plugin}-watch_for_new_files"]
|
334
|
+
===== `watch_for_new_files`
|
335
|
+
|
336
|
+
* Value type is <<boolean,boolean>>
|
337
|
+
* Default value is `true`
|
338
|
+
|
339
|
+
Whether or not to watch for new files.
|
340
|
+
Disabling this option causes the input to close itself after processing the files from a single listing.
|
341
|
+
|
342
|
+
[id="plugins-{type}s-{plugin}-common-options"]
|
343
|
+
include::{include_path}/{type}.asciidoc[]
|
344
|
+
|
345
|
+
:default_codec!:
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# This is patch related to the autoloading and ruby
|
2
|
+
#
|
3
|
+
# The fix exist in jruby 9k but not in the current jruby, not sure when or it will be backported
|
4
|
+
# https://github.com/jruby/jruby/issues/3645
|
5
|
+
#
|
6
|
+
# AWS is doing tricky name discovery in the module to generate the correct error class and
|
7
|
+
# this strategy is bogus in jruby and `eager_autoload` don't fix this issue.
|
8
|
+
#
|
9
|
+
# This will be a short lived patch since AWS is removing the need.
|
10
|
+
# see: https://github.com/aws/aws-sdk-ruby/issues/1301#issuecomment-261115960
|
11
|
+
old_stderr = $stderr
|
12
|
+
|
13
|
+
$stderr = StringIO.new
|
14
|
+
begin
|
15
|
+
module Aws
|
16
|
+
const_set(:S3, Aws::S3)
|
17
|
+
end
|
18
|
+
ensure
|
19
|
+
$stderr = old_stderr
|
20
|
+
end
|