fluent-plugin-s3 1.5.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
- data/.github/ISSUE_TEMPLATE/config.yml +5 -0
- data/.github/ISSUE_TEMPLATE/feature_request.yaml +38 -0
- data/.github/workflows/linux.yml +5 -3
- data/.github/workflows/stale-actions.yml +22 -0
- data/ChangeLog +15 -0
- data/README.md +13 -781
- data/VERSION +1 -1
- data/docs/credentials.md +171 -0
- data/docs/howto.md +92 -0
- data/docs/input.md +98 -0
- data/docs/output.md +453 -0
- data/docs/v0.12.md +52 -0
- data/fluent-plugin-s3.gemspec +3 -0
- data/lib/fluent/plugin/in_s3.rb +26 -1
- data/lib/fluent/plugin/out_s3.rb +12 -3
- data/lib/fluent/plugin/s3_compressor_parquet.rb +83 -0
- data/test/test_in_s3.rb +108 -5
- data/test/test_out_s3.rb +167 -118
- metadata +28 -7
- data/.travis.yml +0 -24
data/docs/output.md
ADDED
@@ -0,0 +1,453 @@
|
|
1
|
+
# Configuration: Output
|
2
|
+
|
3
|
+
Here is a sample configuration and available parameters for fluentd v1 or later.
|
4
|
+
See also [Configuration: credentials](credentials.md) for common comprehensive parameters.
|
5
|
+
|
6
|
+
<match pattern>
|
7
|
+
@type s3
|
8
|
+
|
9
|
+
aws_key_id YOUR_AWS_KEY_ID
|
10
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
11
|
+
s3_bucket YOUR_S3_BUCKET_NAME
|
12
|
+
s3_region ap-northeast-1
|
13
|
+
|
14
|
+
path logs/${tag}/%Y/%m/%d/
|
15
|
+
s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
|
16
|
+
|
17
|
+
# if you want to use ${tag} or %Y/%m/%d/ like syntax in path / s3_object_key_format,
|
18
|
+
# need to specify tag for ${tag} and time for %Y/%m/%d in <buffer> argument.
|
19
|
+
<buffer tag,time>
|
20
|
+
@type file
|
21
|
+
path /var/log/fluent/s3
|
22
|
+
timekey 3600 # 1 hour partition
|
23
|
+
timekey_wait 10m
|
24
|
+
timekey_use_utc true # use utc
|
25
|
+
</buffer>
|
26
|
+
<format>
|
27
|
+
@type json
|
28
|
+
</format>
|
29
|
+
</match>
|
30
|
+
|
31
|
+
For [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section), you can use any record field in `path` / `s3_object_key_format`.
|
32
|
+
|
33
|
+
path logs/${tag}/${foo}
|
34
|
+
<buffer tag,foo>
|
35
|
+
# parameters...
|
36
|
+
</buffer>
|
37
|
+
|
38
|
+
See official article for available parameters and usage of placeholder in detail: [Config: Buffer Section](https://docs.fluentd.org/configuration/buffer-section#placeholders)
|
39
|
+
|
40
|
+
Note that this configuration doesn't work with fluentd v0.12. See [v0.12](v0.12.md) for v0.12 style.
|
41
|
+
|
42
|
+
## aws_iam_retries
|
43
|
+
|
44
|
+
This parameter is deprecated. Use [instance_profile_credentials](credentials.md#instance_profile_credentials) instead.
|
45
|
+
|
46
|
+
The number of attempts to make (with exponential backoff) when loading
|
47
|
+
instance profile credentials from the EC2 metadata service using an IAM
|
48
|
+
role. Defaults to 5 retries.
|
49
|
+
|
50
|
+
## s3_bucket (required)
|
51
|
+
|
52
|
+
S3 bucket name.
|
53
|
+
|
54
|
+
## s3_region
|
55
|
+
|
56
|
+
s3 region name. For example, US West (Oregon) Region is "us-west-2". The
|
57
|
+
full list of regions are available here. >
|
58
|
+
http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region. We
|
59
|
+
recommend using `s3_region` instead of [`s3_endpoint`](#s3_endpoint).
|
60
|
+
|
61
|
+
## s3_endpoint
|
62
|
+
|
63
|
+
endpoint for S3 compatible services. For example, Riak CS based storage or
|
64
|
+
something. This option is deprecated for AWS S3, use [`s3_region`](#s3_region) instead.
|
65
|
+
|
66
|
+
See also AWS article: [Working with Regions](https://aws.amazon.com/blogs/developer/working-with-regions/).
|
67
|
+
|
68
|
+
## enable_transfer_acceleration
|
69
|
+
|
70
|
+
Enable [S3 Transfer Acceleration](https://docs.aws.amazon.com/AmazonS3/latest/dev/transfer-acceleration.html) for uploads. **IMPORTANT**: For this to work, you must first enable this feature on your destination S3 bucket.
|
71
|
+
|
72
|
+
## enable_dual_stack
|
73
|
+
|
74
|
+
Enable [Amazon S3 Dual-Stack Endpoints](https://docs.aws.amazon.com/AmazonS3/latest/dev/dual-stack-endpoints.html) for uploads. Will make it possible to use either IPv4 or IPv6 when connecting to S3.
|
75
|
+
|
76
|
+
## use_bundled_cert
|
77
|
+
|
78
|
+
For cases where the default SSL certificate is unavailable (e.g. Windows), you can set this option to true in order to use the AWS SDK bundled certificate. Default is false.
|
79
|
+
|
80
|
+
This fixes the following error often seen in Windows:
|
81
|
+
|
82
|
+
SSL_connect returned=1 errno=0 state=SSLv3 read server certificate B: certificate verify failed (Seahorse::Client::NetworkingError)
|
83
|
+
|
84
|
+
## ssl_verify_peer
|
85
|
+
|
86
|
+
Verify SSL certificate of the endpoint. Default is true. Set false when you want to ignore the endpoint SSL certificate.
|
87
|
+
|
88
|
+
## s3_object_key_format
|
89
|
+
|
90
|
+
The format of S3 object keys. You can use several built-in variables:
|
91
|
+
|
92
|
+
* %{path}
|
93
|
+
* %{time_slice}
|
94
|
+
* %{index}
|
95
|
+
* %{file_extension}
|
96
|
+
* %{hex_random}
|
97
|
+
* %{uuid_flush}
|
98
|
+
* %{hostname}
|
99
|
+
|
100
|
+
to decide keys dynamically.
|
101
|
+
|
102
|
+
* %{path} is exactly the value of **path** configured in the configuration file.
|
103
|
+
E.g., "logs/" in the example configuration above.
|
104
|
+
* %{time_slice} is the
|
105
|
+
time-slice in text that are formatted with **time_slice_format**.
|
106
|
+
* %{index} is the sequential number starts from 0, increments when multiple files are uploaded to S3 in the same time slice.
|
107
|
+
* %{file_extension} depends on **store_as** parameter.
|
108
|
+
* %{uuid_flush} a uuid that is replaced everytime the buffer will be flushed.
|
109
|
+
* %{hostname} is replaced with `Socket.gethostname` result.
|
110
|
+
* %{hex_random} a random hex string that is replaced for each buffer chunk, not
|
111
|
+
assured to be unique. This is used to follow a way of performance tuning, `Add
|
112
|
+
a Hex Hash Prefix to Key Name`, written in [Request Rate and Performance
|
113
|
+
Considerations - Amazon Simple Storage
|
114
|
+
Service](https://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html).
|
115
|
+
You can configure the length of string with a
|
116
|
+
`hex_random_length` parameter (Default: 4).
|
117
|
+
|
118
|
+
The default format is `%{path}%{time_slice}_%{index}.%{file_extension}`.
|
119
|
+
In addition, you can use [buffer placeholders](https://docs.fluentd.org/configuration/buffer-section#placeholders) in this parameter,
|
120
|
+
so you can embed tag, time and record value like below:
|
121
|
+
|
122
|
+
s3_object_key_format %{path}/events/%Y%m%d/${tag}_%{index}.%{file_extension}
|
123
|
+
<buffer tag,time>
|
124
|
+
# buffer parameters...
|
125
|
+
</buffer>
|
126
|
+
|
127
|
+
For instance, using the example configuration above, actual object keys on S3
|
128
|
+
will be something like:
|
129
|
+
|
130
|
+
"logs/20130111-22_0.gz"
|
131
|
+
"logs/20130111-23_0.gz"
|
132
|
+
"logs/20130111-23_1.gz"
|
133
|
+
"logs/20130112-00_0.gz"
|
134
|
+
|
135
|
+
With the configuration:
|
136
|
+
|
137
|
+
s3_object_key_format %{path}/events/ts=%{time_slice}/events_%{index}.%{file_extension}
|
138
|
+
path log
|
139
|
+
time_slice_format %Y%m%d-%H
|
140
|
+
|
141
|
+
You get:
|
142
|
+
|
143
|
+
"log/events/ts=20130111-22/events_0.gz"
|
144
|
+
"log/events/ts=20130111-23/events_0.gz"
|
145
|
+
"log/events/ts=20130111-23/events_1.gz"
|
146
|
+
"log/events/ts=20130112-00/events_0.gz"
|
147
|
+
|
148
|
+
NOTE: ${hostname} placeholder is deprecated since v0.8. You can get same result by using [configuration's embedded ruby code feature](https://docs.fluentd.org/configuration/config-file#embedded-ruby-code).
|
149
|
+
|
150
|
+
s3_object_key_format %{path}%{time_slice}_%{hostname}%{index}.%{file_extension}
|
151
|
+
s3_object_key_format "%{path}%{time_slice}_#{Socket.gethostname}%{index}.%{file_extension}"
|
152
|
+
|
153
|
+
Above two configurations are same. The important point is wrapping `""` is needed for `#{Socket.gethostname}`.
|
154
|
+
|
155
|
+
NOTE: If `check_object` is set to `false`, Ensure the value of `s3_object_key_format` must be unique in each write, If not, existing file will be overwritten.
|
156
|
+
|
157
|
+
## force_path_style
|
158
|
+
|
159
|
+
:force_path_style (Boolean) — default: false — When set to true, the
|
160
|
+
bucket name is always left in the request URI and never moved to the host
|
161
|
+
as a sub-domain. See Plugins::S3BucketDns for more details.
|
162
|
+
|
163
|
+
This parameter is deprecated. See AWS announcement: https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/
|
164
|
+
|
165
|
+
## store_as
|
166
|
+
|
167
|
+
archive format on S3. You can use several format:
|
168
|
+
|
169
|
+
* gzip (default)
|
170
|
+
* json
|
171
|
+
* text
|
172
|
+
* lzo (Need lzop command)
|
173
|
+
* lzma2 (Need xz command)
|
174
|
+
* gzip_command (Need gzip command)
|
175
|
+
* This compressor uses an external gzip command, hence would result in
|
176
|
+
utilizing CPU cores well compared with `gzip`
|
177
|
+
* parquet (Need columnify command)
|
178
|
+
* This compressor uses an external [columnify](https://github.com/reproio/columnify) command.
|
179
|
+
* Use [`<compress>`](#compress-for-parquet-compressor-only) section to configure columnify command behavior.
|
180
|
+
|
181
|
+
See [Use your compression algorithm](howto.md#use-your-compression-algorighm) section for adding another format.
|
182
|
+
|
183
|
+
## \<compress\> (for parquet compressor only) section
|
184
|
+
|
185
|
+
### parquet_compression_codec
|
186
|
+
|
187
|
+
parquet compression codec.
|
188
|
+
|
189
|
+
* uncompressed
|
190
|
+
* snappy (default)
|
191
|
+
* gzip
|
192
|
+
* lzo (unsupported by columnify)
|
193
|
+
* brotli (unsupported by columnify)
|
194
|
+
* lz4 (unsupported by columnify)
|
195
|
+
* zstd
|
196
|
+
|
197
|
+
### parquet_page_size
|
198
|
+
|
199
|
+
parquet file page size. default: 8192 bytes
|
200
|
+
|
201
|
+
### parquet_row_group_size
|
202
|
+
|
203
|
+
parquet file row group size. default: 128 MB
|
204
|
+
|
205
|
+
### record_type
|
206
|
+
|
207
|
+
record data format type.
|
208
|
+
|
209
|
+
* avro
|
210
|
+
* csv
|
211
|
+
* jsonl
|
212
|
+
* msgpack
|
213
|
+
* tsv
|
214
|
+
* msgpack (default)
|
215
|
+
* json
|
216
|
+
|
217
|
+
### schema_type
|
218
|
+
|
219
|
+
schema type.
|
220
|
+
|
221
|
+
* avro (default)
|
222
|
+
* bigquery
|
223
|
+
|
224
|
+
### schema_file (required)
|
225
|
+
|
226
|
+
path to schema file.
|
227
|
+
|
228
|
+
## \<format\> section
|
229
|
+
|
230
|
+
Change one line format in the S3 object. Supported formats are "out_file",
|
231
|
+
"json", "ltsv", "single_value" and other formatter plugins. See also [official Formatter article](https://docs.fluentd.org/formatter).
|
232
|
+
|
233
|
+
* out_file (default).
|
234
|
+
|
235
|
+
time\ttag\t{..json1..}
|
236
|
+
time\ttag\t{..json2..}
|
237
|
+
...
|
238
|
+
|
239
|
+
* json
|
240
|
+
|
241
|
+
{..json1..}
|
242
|
+
{..json2..}
|
243
|
+
...
|
244
|
+
|
245
|
+
|
246
|
+
At this format, "time" and "tag" are omitted. But you can set these
|
247
|
+
information to the record by setting `<inject>` option. If you set following configuration in
|
248
|
+
S3 output:
|
249
|
+
|
250
|
+
<format>
|
251
|
+
@type json
|
252
|
+
</format>
|
253
|
+
<inject>
|
254
|
+
time_key log_time
|
255
|
+
</inject>
|
256
|
+
|
257
|
+
then the record has log_time field.
|
258
|
+
|
259
|
+
{"log_time":"time string",...}
|
260
|
+
|
261
|
+
See also [official Inject Section article](https://docs.fluentd.org/configuration/inject-section).
|
262
|
+
|
263
|
+
* ltsv
|
264
|
+
|
265
|
+
key1:value1\tkey2:value2
|
266
|
+
key1:value1\tkey2:value2
|
267
|
+
...
|
268
|
+
|
269
|
+
* single_value
|
270
|
+
|
271
|
+
|
272
|
+
Use specified value instead of entire recode. If you get '{"message":"my
|
273
|
+
log"}', then contents are
|
274
|
+
|
275
|
+
my log1
|
276
|
+
my log2
|
277
|
+
...
|
278
|
+
|
279
|
+
You can change key name by "message_key" option.
|
280
|
+
|
281
|
+
## auto_create_bucket
|
282
|
+
|
283
|
+
Create S3 bucket if it does not exists. Default is true.
|
284
|
+
|
285
|
+
## check_bucket
|
286
|
+
|
287
|
+
Check mentioned bucket if it exists in AWS or not. Default is true.
|
288
|
+
|
289
|
+
When it is false, fluentd will not check aws s3 for the existence of the mentioned bucket.
|
290
|
+
This is the case where bucket will be pre-created before running fluentd.
|
291
|
+
|
292
|
+
## check_object
|
293
|
+
|
294
|
+
Check object before creation if it exists or not. Default is true.
|
295
|
+
|
296
|
+
When it is false, s3_object_key_format will be %{path}%{time_slice}_%{hms_slice}.%{file_extension} by default where,
|
297
|
+
hms_slice will be time-slice in hhmmss format, so that each object will be unique.
|
298
|
+
Example object name, assuming it is created on 2016/16/11 3:30:54 PM 20161611_153054.txt (extension can be anything as per user's choice)
|
299
|
+
|
300
|
+
## check_apikey_on_start
|
301
|
+
|
302
|
+
Check AWS key on start. Default is true.
|
303
|
+
|
304
|
+
## proxy_uri
|
305
|
+
|
306
|
+
uri of proxy environment.
|
307
|
+
|
308
|
+
## path
|
309
|
+
|
310
|
+
path prefix of the files on S3. Default is "" (no prefix).
|
311
|
+
[buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) is supported,
|
312
|
+
so you can embed tag, time and record value like below.
|
313
|
+
|
314
|
+
path logs/%Y%m%d/${tag}/
|
315
|
+
<buffer tag,time>
|
316
|
+
# buffer parameters...
|
317
|
+
</buffer>
|
318
|
+
|
319
|
+
## utc
|
320
|
+
|
321
|
+
Use UTC instead of local time.
|
322
|
+
|
323
|
+
## storage_class
|
324
|
+
|
325
|
+
Set storage class. Possible values are `STANDARD`, `REDUCED_REDUNDANCY`, `STANDARD_IA` from [Ruby SDK](http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Object.html#storage_class-instance_method).
|
326
|
+
|
327
|
+
Note that reduced redundancy is [not reccomended](https://serverfault.com/a/1010951/512362).
|
328
|
+
|
329
|
+
## reduced_redundancy
|
330
|
+
|
331
|
+
Use S3 reduced redundancy storage for 33% cheaper pricing. Default is
|
332
|
+
false.
|
333
|
+
|
334
|
+
This is deprecated. Use `storage_class REDUCED_REDUNDANCY` instead.
|
335
|
+
|
336
|
+
## acl
|
337
|
+
|
338
|
+
Permission for the object in S3. This is useful for cross-account access
|
339
|
+
using IAM roles. Valid values are:
|
340
|
+
|
341
|
+
* private (default)
|
342
|
+
* public-read
|
343
|
+
* public-read-write (not recommended - see [Canned
|
344
|
+
ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl))
|
345
|
+
* authenticated-read
|
346
|
+
* bucket-owner-read
|
347
|
+
* bucket-owner-full-control
|
348
|
+
|
349
|
+
To use cross-account access, you will need to create a bucket policy granting
|
350
|
+
the specific access required. Refer to the [AWS
|
351
|
+
documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/example-walkthroughs-managing-access-example3.html) for examples.
|
352
|
+
|
353
|
+
## grant_full_control
|
354
|
+
|
355
|
+
Allows grantee READ, READ_ACP, and WRITE_ACP permissions on the object.
|
356
|
+
This is useful for cross-account access using IAM roles.
|
357
|
+
|
358
|
+
Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID.
|
359
|
+
|
360
|
+
e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"`
|
361
|
+
|
362
|
+
Note that a canonical user ID is different from an AWS account ID.
|
363
|
+
Please refer to [AWS documentation](https://docs.aws.amazon.com/general/latest/gr/acct-identifiers.html) for more details.
|
364
|
+
|
365
|
+
## grant_read
|
366
|
+
|
367
|
+
Allows grantee to read the object data and its metadata.
|
368
|
+
Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID.
|
369
|
+
|
370
|
+
e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"`
|
371
|
+
|
372
|
+
## grant_read_acp
|
373
|
+
|
374
|
+
Allows grantee to read the object ACL.
|
375
|
+
Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID.
|
376
|
+
|
377
|
+
e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"`
|
378
|
+
|
379
|
+
## grant_write_acp
|
380
|
+
|
381
|
+
Allows grantee to write the ACL for the applicable object.
|
382
|
+
Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID.
|
383
|
+
|
384
|
+
e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"`
|
385
|
+
|
386
|
+
## hex_random_length
|
387
|
+
|
388
|
+
The length of `%{hex_random}` placeholder. Default is 4 as written in
|
389
|
+
[Request Rate and Performance Considerations - Amazon Simple Storage
|
390
|
+
Service](https://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html).
|
391
|
+
The maximum length is 16.
|
392
|
+
|
393
|
+
## index_format
|
394
|
+
|
395
|
+
`%{index}` is formatted by [sprintf](http://ruby-doc.org/core-2.2.0/Kernel.html#method-i-sprintf) using this format_string. Default is '%d'. Zero padding is supported e.g. `%04d` to ensure minimum length four digits. `%{index}` can be in lowercase or uppercase hex using '%x' or '%X'
|
396
|
+
|
397
|
+
## overwrite
|
398
|
+
|
399
|
+
Overwrite already existing path. Default is false, which raises an error
|
400
|
+
if a s3 object of the same path already exists, or increment the
|
401
|
+
`%{index}` placeholder until finding an absent path.
|
402
|
+
|
403
|
+
## use_server_side_encryption
|
404
|
+
|
405
|
+
The Server-side encryption algorithm used when storing this object in S3
|
406
|
+
(e.g., AES256, aws:kms)
|
407
|
+
|
408
|
+
## ssekms_key_id
|
409
|
+
|
410
|
+
Specifies the AWS KMS key ID to use for object encryption. You have to
|
411
|
+
set "aws:kms" to [`use_server_side_encryption`](#use_server_side_encryption) to use the KMS encryption.
|
412
|
+
|
413
|
+
## sse_customer_algorithm
|
414
|
+
|
415
|
+
Specifies the algorithm to use to when encrypting the object (e.g., AES256).
|
416
|
+
|
417
|
+
## sse_customer_key
|
418
|
+
|
419
|
+
Specifies the AWS KMS key ID to use for object encryption.
|
420
|
+
|
421
|
+
## sse_customer_key_md5
|
422
|
+
|
423
|
+
Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321.
|
424
|
+
|
425
|
+
## compute_checksums
|
426
|
+
|
427
|
+
AWS SDK uses MD5 for API request/response by default. On FIPS enabled environment,
|
428
|
+
OpenSSL returns an error because MD5 is disabled. If you want to use
|
429
|
+
this plugin on FIPS enabled environment, set `compute_checksums false`.
|
430
|
+
|
431
|
+
## signature_version
|
432
|
+
|
433
|
+
Signature version for API request. `s3` means signature version 2 and
|
434
|
+
`v4` means signature version 4. Default is `nil` (Following SDK's default).
|
435
|
+
It would be useful when you use S3 compatible storage that accepts only signature version 2.
|
436
|
+
|
437
|
+
## warn_for_delay
|
438
|
+
|
439
|
+
Given a threshold to treat events as delay, output warning logs if delayed events were put into s3.
|
440
|
+
|
441
|
+
## tagging
|
442
|
+
|
443
|
+
The S3 tag-set for the object. The tag-set must be encoded as URL Query parameters. (For example, "Key1=Value1").
|
444
|
+
|
445
|
+
## \<bucket_lifecycle_rule\> section
|
446
|
+
|
447
|
+
Specify one or more lifecycle rules for the bucket
|
448
|
+
|
449
|
+
<bucket_lifecycle_rule>
|
450
|
+
id UNIQUE_ID_FOR_THE_RULE
|
451
|
+
prefix OPTIONAL_PREFIX # Objects whose keys begin with this prefix will be affected by the rule. If not specified all objects of the bucket will be affected
|
452
|
+
expiration_days NUMBER_OF_DAYS # The number of days before the object will expire
|
453
|
+
</bucket_lifecycle_rule>
|
data/docs/v0.12.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Configuration: Output (v0.12 style)
|
2
|
+
|
3
|
+
Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
|
4
|
+
|
5
|
+
<match pattern>
|
6
|
+
@type s3
|
7
|
+
|
8
|
+
aws_key_id YOUR_AWS_KEY_ID
|
9
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
10
|
+
s3_bucket YOUR_S3_BUCKET_NAME
|
11
|
+
s3_region ap-northeast-1
|
12
|
+
|
13
|
+
path logs/
|
14
|
+
s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
|
15
|
+
buffer_path /var/log/fluent/s3
|
16
|
+
time_slice_format %Y%m%d-%H
|
17
|
+
time_slice_wait 10m
|
18
|
+
utc
|
19
|
+
format json
|
20
|
+
</match>
|
21
|
+
|
22
|
+
If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
|
23
|
+
|
24
|
+
The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
|
25
|
+
|
26
|
+
## format (for v0.12)
|
27
|
+
|
28
|
+
@format json
|
29
|
+
include_time_key true
|
30
|
+
time_key log_time # default is time
|
31
|
+
|
32
|
+
This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
|
33
|
+
|
34
|
+
## buffer_path (for v0.12)
|
35
|
+
|
36
|
+
path prefix of the files to buffer logs.
|
37
|
+
|
38
|
+
This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
|
39
|
+
|
40
|
+
## time_slice_format (for v0.12)
|
41
|
+
|
42
|
+
Format of the time used as the file name. Default is '%Y%m%d'. Use
|
43
|
+
'%Y%m%d%H' to split files hourly.
|
44
|
+
|
45
|
+
This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
|
46
|
+
|
47
|
+
## time_slice_wait (for v0.12)
|
48
|
+
|
49
|
+
The time to wait old logs. Default is 10 minutes. Specify larger value if
|
50
|
+
old logs may reach.
|
51
|
+
|
52
|
+
This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
|
data/fluent-plugin-s3.gemspec
CHANGED
@@ -23,4 +23,7 @@ Gem::Specification.new do |gem|
|
|
23
23
|
gem.add_development_dependency "test-unit", ">= 3.0.8"
|
24
24
|
gem.add_development_dependency "test-unit-rr", ">= 1.0.3"
|
25
25
|
gem.add_development_dependency "timecop"
|
26
|
+
# aws-sdk-core requires one of ox, oga, libxml, nokogiri or rexml,
|
27
|
+
# and rexml is no longer default gem as of Ruby 3.0.
|
28
|
+
gem.add_development_dependency "rexml"
|
26
29
|
end
|
data/lib/fluent/plugin/in_s3.rb
CHANGED
@@ -98,6 +98,10 @@ module Fluent::Plugin
|
|
98
98
|
config_param :queue_owner_aws_account_id, :string, default: nil
|
99
99
|
desc "Use 's3_region' instead"
|
100
100
|
config_param :endpoint, :string, default: nil
|
101
|
+
desc "AWS access key id for SQS user"
|
102
|
+
config_param :aws_key_id, :string, default: nil, secret: true
|
103
|
+
desc "AWS secret key for SQS user."
|
104
|
+
config_param :aws_sec_key, :string, default: nil, secret: true
|
101
105
|
desc "Skip message deletion"
|
102
106
|
config_param :skip_delete, :bool, default: false
|
103
107
|
desc "The long polling interval."
|
@@ -115,10 +119,15 @@ module Fluent::Plugin
|
|
115
119
|
|
116
120
|
attr_reader :bucket
|
117
121
|
|
122
|
+
def reject_s3_endpoint?
|
123
|
+
@s3_endpoint && !@s3_endpoint.end_with?('vpce.amazonaws.com') &&
|
124
|
+
@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) }
|
125
|
+
end
|
126
|
+
|
118
127
|
def configure(conf)
|
119
128
|
super
|
120
129
|
|
121
|
-
if
|
130
|
+
if reject_s3_endpoint?
|
122
131
|
raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
|
123
132
|
end
|
124
133
|
|
@@ -131,6 +140,14 @@ module Fluent::Plugin
|
|
131
140
|
raise Fluent::ConfigError, "sqs/queue_name is required"
|
132
141
|
end
|
133
142
|
|
143
|
+
if !!@aws_key_id ^ !!@aws_sec_key
|
144
|
+
raise Fluent::ConfigError, "aws_key_id or aws_sec_key is missing"
|
145
|
+
end
|
146
|
+
|
147
|
+
if !!@sqs.aws_key_id ^ !!@sqs.aws_sec_key
|
148
|
+
raise Fluent::ConfigError, "sqs/aws_key_id or sqs/aws_sec_key is missing"
|
149
|
+
end
|
150
|
+
|
134
151
|
Aws.use_bundled_cert! if @use_bundled_cert
|
135
152
|
|
136
153
|
@extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log)
|
@@ -139,6 +156,10 @@ module Fluent::Plugin
|
|
139
156
|
@parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
|
140
157
|
end
|
141
158
|
|
159
|
+
def multi_workers_ready?
|
160
|
+
true
|
161
|
+
end
|
162
|
+
|
142
163
|
def start
|
143
164
|
super
|
144
165
|
|
@@ -270,6 +291,10 @@ module Fluent::Plugin
|
|
270
291
|
options[:region] = @s3_region if @s3_region
|
271
292
|
options[:endpoint] = @sqs.endpoint if @sqs.endpoint
|
272
293
|
options[:http_proxy] = @proxy_uri if @proxy_uri
|
294
|
+
if @sqs.aws_key_id && @sqs.aws_sec_key
|
295
|
+
options[:access_key_id] = @sqs.aws_key_id
|
296
|
+
options[:secret_access_key] = @sqs.aws_sec_key
|
297
|
+
end
|
273
298
|
log.on_trace do
|
274
299
|
options[:http_wire_trace] = true
|
275
300
|
options[:logger] = log
|
data/lib/fluent/plugin/out_s3.rb
CHANGED
@@ -147,6 +147,8 @@ module Fluent::Plugin
|
|
147
147
|
config_param :signature_version, :string, default: nil # use nil to follow SDK default configuration
|
148
148
|
desc "Given a threshold to treat events as delay, output warning logs if delayed events were put into s3"
|
149
149
|
config_param :warn_for_delay, :time, default: nil
|
150
|
+
desc "Arbitrary S3 tag-set for the object"
|
151
|
+
config_param :tagging, :string, default: nil
|
150
152
|
desc "Arbitrary S3 metadata headers to set for the object"
|
151
153
|
config_param :s3_metadata, :hash, default: nil
|
152
154
|
config_section :bucket_lifecycle_rule, param_name: :bucket_lifecycle_rules, multi: true do
|
@@ -173,6 +175,11 @@ module Fluent::Plugin
|
|
173
175
|
|
174
176
|
MAX_HEX_RANDOM_LENGTH = 16
|
175
177
|
|
178
|
+
def reject_s3_endpoint?
|
179
|
+
@s3_endpoint && !@s3_endpoint.end_with?('vpce.amazonaws.com') &&
|
180
|
+
@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) }
|
181
|
+
end
|
182
|
+
|
176
183
|
def configure(conf)
|
177
184
|
compat_parameters_convert(conf, :buffer, :formatter, :inject)
|
178
185
|
|
@@ -180,7 +187,7 @@ module Fluent::Plugin
|
|
180
187
|
|
181
188
|
Aws.use_bundled_cert! if @use_bundled_cert
|
182
189
|
|
183
|
-
if
|
190
|
+
if reject_s3_endpoint?
|
184
191
|
raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
|
185
192
|
end
|
186
193
|
|
@@ -355,6 +362,7 @@ module Fluent::Plugin
|
|
355
362
|
put_options[:grant_read] = @grant_read if @grant_read
|
356
363
|
put_options[:grant_read_acp] = @grant_read_acp if @grant_read_acp
|
357
364
|
put_options[:grant_write_acp] = @grant_write_acp if @grant_write_acp
|
365
|
+
put_options[:tagging] = @tagging if @tagging
|
358
366
|
|
359
367
|
if @s3_metadata
|
360
368
|
put_options[:metadata] = {}
|
@@ -456,8 +464,9 @@ module Fluent::Plugin
|
|
456
464
|
log.warn "The default value of s3_object_key_format will use ${chunk_id} instead of %{index} to avoid object conflict in v2"
|
457
465
|
end
|
458
466
|
|
459
|
-
|
460
|
-
|
467
|
+
is_working_on_parallel = @buffer_config.flush_thread_count > 1 || system_config.workers > 1
|
468
|
+
if is_working_on_parallel && ['${chunk_id}', '%{uuid_flush}'].none? { |key| @s3_object_key_format.include?(key) }
|
469
|
+
log.warn "No ${chunk_id} or %{uuid_flush} in s3_object_key_format with multiple flush threads or multiple workers. Recommend to set ${chunk_id} or %{uuid_flush} to avoid data lost by object conflict"
|
461
470
|
end
|
462
471
|
end
|
463
472
|
|