fluent-plugin-s3 1.5.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +4 -0
- data/README.md +11 -781
- data/VERSION +1 -1
- data/docs/credentials.md +171 -0
- data/docs/howto.md +92 -0
- data/docs/input.md +90 -0
- data/docs/output.md +445 -0
- data/docs/v0.12.md +52 -0
- data/lib/fluent/plugin/s3_compressor_parquet.rb +83 -0
- metadata +10 -7
- data/.travis.yml +0 -24
data/docs/v0.12.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Configuration: Output (v0.12 style)
|
2
|
+
|
3
|
+
Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
|
4
|
+
|
5
|
+
<match pattern>
|
6
|
+
@type s3
|
7
|
+
|
8
|
+
aws_key_id YOUR_AWS_KEY_ID
|
9
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
10
|
+
s3_bucket YOUR_S3_BUCKET_NAME
|
11
|
+
s3_region ap-northeast-1
|
12
|
+
|
13
|
+
path logs/
|
14
|
+
s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
|
15
|
+
buffer_path /var/log/fluent/s3
|
16
|
+
time_slice_format %Y%m%d-%H
|
17
|
+
time_slice_wait 10m
|
18
|
+
utc
|
19
|
+
format json
|
20
|
+
</match>
|
21
|
+
|
22
|
+
If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
|
23
|
+
|
24
|
+
The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
|
25
|
+
|
26
|
+
## format (for v0.12)
|
27
|
+
|
28
|
+
@format json
|
29
|
+
include_time_key true
|
30
|
+
time_key log_time # default is time
|
31
|
+
|
32
|
+
This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
|
33
|
+
|
34
|
+
## buffer_path (for v0.12)
|
35
|
+
|
36
|
+
path prefix of the files to buffer logs.
|
37
|
+
|
38
|
+
This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
|
39
|
+
|
40
|
+
## time_slice_format (for v0.12)
|
41
|
+
|
42
|
+
Format of the time used as the file name. Default is '%Y%m%d'. Use
|
43
|
+
'%Y%m%d%H' to split files hourly.
|
44
|
+
|
45
|
+
This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
|
46
|
+
|
47
|
+
## time_slice_wait (for v0.12)
|
48
|
+
|
49
|
+
The time to wait old logs. Default is 10 minutes. Specify larger value if
|
50
|
+
old logs may reach.
|
51
|
+
|
52
|
+
This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "open3"
|
2
|
+
|
3
|
+
module Fluent::Plugin
|
4
|
+
class S3Output
|
5
|
+
class ParquetCompressor < Compressor
|
6
|
+
S3Output.register_compressor("parquet", self)
|
7
|
+
|
8
|
+
config_section :compress, multi: false do
|
9
|
+
desc "parquet compression codec"
|
10
|
+
config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
|
11
|
+
desc "parquet file page size"
|
12
|
+
config_param :parquet_page_size, :size, default: 8192
|
13
|
+
desc "parquet file row group size"
|
14
|
+
config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
|
15
|
+
desc "record data format type"
|
16
|
+
config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
|
17
|
+
desc "schema type"
|
18
|
+
config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
|
19
|
+
desc "path to schema file"
|
20
|
+
config_param :schema_file, :string
|
21
|
+
end
|
22
|
+
|
23
|
+
def configure(conf)
|
24
|
+
super
|
25
|
+
check_command("columnify", "-h")
|
26
|
+
|
27
|
+
if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
|
28
|
+
raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
|
29
|
+
end
|
30
|
+
|
31
|
+
@parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
|
32
|
+
if @compress.record_type == :json
|
33
|
+
@record_type = :jsonl
|
34
|
+
else
|
35
|
+
@record_type = @compress.record_type
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def ext
|
40
|
+
"parquet".freeze
|
41
|
+
end
|
42
|
+
|
43
|
+
def content_type
|
44
|
+
"application/octet-stream".freeze
|
45
|
+
end
|
46
|
+
|
47
|
+
def compress(chunk, tmp)
|
48
|
+
chunk_is_file = @buffer_type == "file"
|
49
|
+
path = if chunk_is_file
|
50
|
+
chunk.path
|
51
|
+
else
|
52
|
+
w = Tempfile.new("chunk-parquet-tmp")
|
53
|
+
w.binmode
|
54
|
+
chunk.write_to(w)
|
55
|
+
w.close
|
56
|
+
w.path
|
57
|
+
end
|
58
|
+
stdout, stderr, status = columnify(path, tmp.path)
|
59
|
+
unless status.success?
|
60
|
+
raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
|
61
|
+
end
|
62
|
+
ensure
|
63
|
+
unless chunk_is_file
|
64
|
+
w.close(true) rescue nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def columnify(src_path, dst_path)
|
71
|
+
Open3.capture3("columnify",
|
72
|
+
"-parquetCompressionCodec", @parquet_compression_codec,
|
73
|
+
"-parquetPageSize", @compress.parquet_page_size.to_s,
|
74
|
+
"-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
|
75
|
+
"-recordType", @record_type.to_s,
|
76
|
+
"-schemaType", @compress.schema_type.to_s,
|
77
|
+
"-schemaFile", @compress.schema_file,
|
78
|
+
"-output", dst_path,
|
79
|
+
src_path)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-04-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -123,7 +123,6 @@ extra_rdoc_files: []
|
|
123
123
|
files:
|
124
124
|
- ".github/workflows/linux.yml"
|
125
125
|
- ".gitignore"
|
126
|
-
- ".travis.yml"
|
127
126
|
- AUTHORS
|
128
127
|
- ChangeLog
|
129
128
|
- Gemfile
|
@@ -132,6 +131,11 @@ files:
|
|
132
131
|
- Rakefile
|
133
132
|
- VERSION
|
134
133
|
- appveyor.yml
|
134
|
+
- docs/credentials.md
|
135
|
+
- docs/howto.md
|
136
|
+
- docs/input.md
|
137
|
+
- docs/output.md
|
138
|
+
- docs/v0.12.md
|
135
139
|
- fluent-plugin-s3.gemspec
|
136
140
|
- lib/fluent/log-ext.rb
|
137
141
|
- lib/fluent/plugin/in_s3.rb
|
@@ -139,6 +143,7 @@ files:
|
|
139
143
|
- lib/fluent/plugin/s3_compressor_gzip_command.rb
|
140
144
|
- lib/fluent/plugin/s3_compressor_lzma2.rb
|
141
145
|
- lib/fluent/plugin/s3_compressor_lzo.rb
|
146
|
+
- lib/fluent/plugin/s3_compressor_parquet.rb
|
142
147
|
- lib/fluent/plugin/s3_extractor_gzip_command.rb
|
143
148
|
- lib/fluent/plugin/s3_extractor_lzma2.rb
|
144
149
|
- lib/fluent/plugin/s3_extractor_lzo.rb
|
@@ -163,10 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
168
|
- !ruby/object:Gem::Version
|
164
169
|
version: '0'
|
165
170
|
requirements: []
|
166
|
-
rubygems_version: 3.1.
|
171
|
+
rubygems_version: 3.1.2
|
167
172
|
signing_key:
|
168
173
|
specification_version: 4
|
169
174
|
summary: Amazon S3 output plugin for Fluentd event collector
|
170
|
-
test_files:
|
171
|
-
- test/test_in_s3.rb
|
172
|
-
- test/test_out_s3.rb
|
175
|
+
test_files: []
|
data/.travis.yml
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
language: ruby
|
2
|
-
|
3
|
-
rvm:
|
4
|
-
- 2.4
|
5
|
-
- 2.5
|
6
|
-
- 2.6
|
7
|
-
- 2.7
|
8
|
-
- ruby-head
|
9
|
-
|
10
|
-
gemfile:
|
11
|
-
- Gemfile
|
12
|
-
|
13
|
-
branches:
|
14
|
-
only:
|
15
|
-
- master
|
16
|
-
|
17
|
-
before_install: gem update bundler
|
18
|
-
script: bundle exec rake test
|
19
|
-
|
20
|
-
sudo: false
|
21
|
-
|
22
|
-
matrix:
|
23
|
-
allow_failures:
|
24
|
-
- rvm: ruby-head
|