fluent-plugin-s3 1.5.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +4 -0
- data/README.md +11 -781
- data/VERSION +1 -1
- data/docs/credentials.md +171 -0
- data/docs/howto.md +92 -0
- data/docs/input.md +90 -0
- data/docs/output.md +445 -0
- data/docs/v0.12.md +52 -0
- data/lib/fluent/plugin/s3_compressor_parquet.rb +83 -0
- metadata +10 -7
- data/.travis.yml +0 -24
data/docs/v0.12.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Configuration: Output (v0.12 style)
|
2
|
+
|
3
|
+
Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
|
4
|
+
|
5
|
+
<match pattern>
|
6
|
+
@type s3
|
7
|
+
|
8
|
+
aws_key_id YOUR_AWS_KEY_ID
|
9
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
10
|
+
s3_bucket YOUR_S3_BUCKET_NAME
|
11
|
+
s3_region ap-northeast-1
|
12
|
+
|
13
|
+
path logs/
|
14
|
+
s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
|
15
|
+
buffer_path /var/log/fluent/s3
|
16
|
+
time_slice_format %Y%m%d-%H
|
17
|
+
time_slice_wait 10m
|
18
|
+
utc
|
19
|
+
format json
|
20
|
+
</match>
|
21
|
+
|
22
|
+
If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
|
23
|
+
|
24
|
+
The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
|
25
|
+
|
26
|
+
## format (for v0.12)
|
27
|
+
|
28
|
+
@format json
|
29
|
+
include_time_key true
|
30
|
+
time_key log_time # default is time
|
31
|
+
|
32
|
+
This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
|
33
|
+
|
34
|
+
## buffer_path (for v0.12)
|
35
|
+
|
36
|
+
path prefix of the files to buffer logs.
|
37
|
+
|
38
|
+
This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
|
39
|
+
|
40
|
+
## time_slice_format (for v0.12)
|
41
|
+
|
42
|
+
Format of the time used as the file name. Default is '%Y%m%d'. Use
|
43
|
+
'%Y%m%d%H' to split files hourly.
|
44
|
+
|
45
|
+
This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
|
46
|
+
|
47
|
+
## time_slice_wait (for v0.12)
|
48
|
+
|
49
|
+
The time to wait old logs. Default is 10 minutes. Specify larger value if
|
50
|
+
old logs may reach.
|
51
|
+
|
52
|
+
This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "open3"
|
2
|
+
|
3
|
+
module Fluent::Plugin
|
4
|
+
class S3Output
|
5
|
+
class ParquetCompressor < Compressor
|
6
|
+
S3Output.register_compressor("parquet", self)
|
7
|
+
|
8
|
+
config_section :compress, multi: false do
|
9
|
+
desc "parquet compression codec"
|
10
|
+
config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
|
11
|
+
desc "parquet file page size"
|
12
|
+
config_param :parquet_page_size, :size, default: 8192
|
13
|
+
desc "parquet file row group size"
|
14
|
+
config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
|
15
|
+
desc "record data format type"
|
16
|
+
config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
|
17
|
+
desc "schema type"
|
18
|
+
config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
|
19
|
+
desc "path to schema file"
|
20
|
+
config_param :schema_file, :string
|
21
|
+
end
|
22
|
+
|
23
|
+
def configure(conf)
|
24
|
+
super
|
25
|
+
check_command("columnify", "-h")
|
26
|
+
|
27
|
+
if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
|
28
|
+
raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
|
29
|
+
end
|
30
|
+
|
31
|
+
@parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
|
32
|
+
if @compress.record_type == :json
|
33
|
+
@record_type = :jsonl
|
34
|
+
else
|
35
|
+
@record_type = @compress.record_type
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def ext
|
40
|
+
"parquet".freeze
|
41
|
+
end
|
42
|
+
|
43
|
+
def content_type
|
44
|
+
"application/octet-stream".freeze
|
45
|
+
end
|
46
|
+
|
47
|
+
def compress(chunk, tmp)
|
48
|
+
chunk_is_file = @buffer_type == "file"
|
49
|
+
path = if chunk_is_file
|
50
|
+
chunk.path
|
51
|
+
else
|
52
|
+
w = Tempfile.new("chunk-parquet-tmp")
|
53
|
+
w.binmode
|
54
|
+
chunk.write_to(w)
|
55
|
+
w.close
|
56
|
+
w.path
|
57
|
+
end
|
58
|
+
stdout, stderr, status = columnify(path, tmp.path)
|
59
|
+
unless status.success?
|
60
|
+
raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
|
61
|
+
end
|
62
|
+
ensure
|
63
|
+
unless chunk_is_file
|
64
|
+
w.close(true) rescue nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def columnify(src_path, dst_path)
|
71
|
+
Open3.capture3("columnify",
|
72
|
+
"-parquetCompressionCodec", @parquet_compression_codec,
|
73
|
+
"-parquetPageSize", @compress.parquet_page_size.to_s,
|
74
|
+
"-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
|
75
|
+
"-recordType", @record_type.to_s,
|
76
|
+
"-schemaType", @compress.schema_type.to_s,
|
77
|
+
"-schemaFile", @compress.schema_file,
|
78
|
+
"-output", dst_path,
|
79
|
+
src_path)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-04-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -123,7 +123,6 @@ extra_rdoc_files: []
|
|
123
123
|
files:
|
124
124
|
- ".github/workflows/linux.yml"
|
125
125
|
- ".gitignore"
|
126
|
-
- ".travis.yml"
|
127
126
|
- AUTHORS
|
128
127
|
- ChangeLog
|
129
128
|
- Gemfile
|
@@ -132,6 +131,11 @@ files:
|
|
132
131
|
- Rakefile
|
133
132
|
- VERSION
|
134
133
|
- appveyor.yml
|
134
|
+
- docs/credentials.md
|
135
|
+
- docs/howto.md
|
136
|
+
- docs/input.md
|
137
|
+
- docs/output.md
|
138
|
+
- docs/v0.12.md
|
135
139
|
- fluent-plugin-s3.gemspec
|
136
140
|
- lib/fluent/log-ext.rb
|
137
141
|
- lib/fluent/plugin/in_s3.rb
|
@@ -139,6 +143,7 @@ files:
|
|
139
143
|
- lib/fluent/plugin/s3_compressor_gzip_command.rb
|
140
144
|
- lib/fluent/plugin/s3_compressor_lzma2.rb
|
141
145
|
- lib/fluent/plugin/s3_compressor_lzo.rb
|
146
|
+
- lib/fluent/plugin/s3_compressor_parquet.rb
|
142
147
|
- lib/fluent/plugin/s3_extractor_gzip_command.rb
|
143
148
|
- lib/fluent/plugin/s3_extractor_lzma2.rb
|
144
149
|
- lib/fluent/plugin/s3_extractor_lzo.rb
|
@@ -163,10 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
168
|
- !ruby/object:Gem::Version
|
164
169
|
version: '0'
|
165
170
|
requirements: []
|
166
|
-
rubygems_version: 3.1.
|
171
|
+
rubygems_version: 3.1.2
|
167
172
|
signing_key:
|
168
173
|
specification_version: 4
|
169
174
|
summary: Amazon S3 output plugin for Fluentd event collector
|
170
|
-
test_files:
|
171
|
-
- test/test_in_s3.rb
|
172
|
-
- test/test_out_s3.rb
|
175
|
+
test_files: []
|
data/.travis.yml
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
language: ruby
|
2
|
-
|
3
|
-
rvm:
|
4
|
-
- 2.4
|
5
|
-
- 2.5
|
6
|
-
- 2.6
|
7
|
-
- 2.7
|
8
|
-
- ruby-head
|
9
|
-
|
10
|
-
gemfile:
|
11
|
-
- Gemfile
|
12
|
-
|
13
|
-
branches:
|
14
|
-
only:
|
15
|
-
- master
|
16
|
-
|
17
|
-
before_install: gem update bundler
|
18
|
-
script: bundle exec rake test
|
19
|
-
|
20
|
-
sudo: false
|
21
|
-
|
22
|
-
matrix:
|
23
|
-
allow_failures:
|
24
|
-
- rvm: ruby-head
|