fluent-plugin-s3 1.5.1 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/docs/v0.12.md ADDED
@@ -0,0 +1,52 @@
1
+ # Configuration: Output (v0.12 style)
2
+
3
+ Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
4
+
5
+ <match pattern>
6
+ @type s3
7
+
8
+ aws_key_id YOUR_AWS_KEY_ID
9
+ aws_sec_key YOUR_AWS_SECRET_KEY
10
+ s3_bucket YOUR_S3_BUCKET_NAME
11
+ s3_region ap-northeast-1
12
+
13
+ path logs/
14
+ s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
15
+ buffer_path /var/log/fluent/s3
16
+ time_slice_format %Y%m%d-%H
17
+ time_slice_wait 10m
18
+ utc
19
+ format json
20
+ </match>
21
+
22
+ If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
23
+
24
+ The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
25
+
26
+ ## format (for v0.12)
27
+
28
+ @format json
29
+ include_time_key true
30
+ time_key log_time # default is time
31
+
32
+ This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
33
+
34
+ ## buffer_path (for v0.12)
35
+
36
+ path prefix of the files to buffer logs.
37
+
38
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
39
+
40
+ ## time_slice_format (for v0.12)
41
+
42
+ Format of the time used as the file name. Default is '%Y%m%d'. Use
43
+ '%Y%m%d%H' to split files hourly.
44
+
45
+ This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
46
+
47
+ ## time_slice_wait (for v0.12)
48
+
49
+ The time to wait old logs. Default is 10 minutes. Specify larger value if
50
+ old logs may reach.
51
+
52
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
@@ -0,0 +1,83 @@
1
+ require "open3"
2
+
3
+ module Fluent::Plugin
4
+ class S3Output
5
+ class ParquetCompressor < Compressor
6
+ S3Output.register_compressor("parquet", self)
7
+
8
+ config_section :compress, multi: false do
9
+ desc "parquet compression codec"
10
+ config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
11
+ desc "parquet file page size"
12
+ config_param :parquet_page_size, :size, default: 8192
13
+ desc "parquet file row group size"
14
+ config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
15
+ desc "record data format type"
16
+ config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
17
+ desc "schema type"
18
+ config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
19
+ desc "path to schema file"
20
+ config_param :schema_file, :string
21
+ end
22
+
23
+ def configure(conf)
24
+ super
25
+ check_command("columnify", "-h")
26
+
27
+ if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
28
+ raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
29
+ end
30
+
31
+ @parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
32
+ if @compress.record_type == :json
33
+ @record_type = :jsonl
34
+ else
35
+ @record_type = @compress.record_type
36
+ end
37
+ end
38
+
39
+ def ext
40
+ "parquet".freeze
41
+ end
42
+
43
+ def content_type
44
+ "application/octet-stream".freeze
45
+ end
46
+
47
+ def compress(chunk, tmp)
48
+ chunk_is_file = @buffer_type == "file"
49
+ path = if chunk_is_file
50
+ chunk.path
51
+ else
52
+ w = Tempfile.new("chunk-parquet-tmp")
53
+ w.binmode
54
+ chunk.write_to(w)
55
+ w.close
56
+ w.path
57
+ end
58
+ stdout, stderr, status = columnify(path, tmp.path)
59
+ unless status.success?
60
+ raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
61
+ end
62
+ ensure
63
+ unless chunk_is_file
64
+ w.close(true) rescue nil
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ def columnify(src_path, dst_path)
71
+ Open3.capture3("columnify",
72
+ "-parquetCompressionCodec", @parquet_compression_codec,
73
+ "-parquetPageSize", @compress.parquet_page_size.to_s,
74
+ "-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
75
+ "-recordType", @record_type.to_s,
76
+ "-schemaType", @compress.schema_type.to_s,
77
+ "-schemaFile", @compress.schema_file,
78
+ "-output", dst_path,
79
+ src_path)
80
+ end
81
+ end
82
+ end
83
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-02-16 00:00:00.000000000 Z
12
+ date: 2021-04-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -123,7 +123,6 @@ extra_rdoc_files: []
123
123
  files:
124
124
  - ".github/workflows/linux.yml"
125
125
  - ".gitignore"
126
- - ".travis.yml"
127
126
  - AUTHORS
128
127
  - ChangeLog
129
128
  - Gemfile
@@ -132,6 +131,11 @@ files:
132
131
  - Rakefile
133
132
  - VERSION
134
133
  - appveyor.yml
134
+ - docs/credentials.md
135
+ - docs/howto.md
136
+ - docs/input.md
137
+ - docs/output.md
138
+ - docs/v0.12.md
135
139
  - fluent-plugin-s3.gemspec
136
140
  - lib/fluent/log-ext.rb
137
141
  - lib/fluent/plugin/in_s3.rb
@@ -139,6 +143,7 @@ files:
139
143
  - lib/fluent/plugin/s3_compressor_gzip_command.rb
140
144
  - lib/fluent/plugin/s3_compressor_lzma2.rb
141
145
  - lib/fluent/plugin/s3_compressor_lzo.rb
146
+ - lib/fluent/plugin/s3_compressor_parquet.rb
142
147
  - lib/fluent/plugin/s3_extractor_gzip_command.rb
143
148
  - lib/fluent/plugin/s3_extractor_lzma2.rb
144
149
  - lib/fluent/plugin/s3_extractor_lzo.rb
@@ -163,10 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
168
  - !ruby/object:Gem::Version
164
169
  version: '0'
165
170
  requirements: []
166
- rubygems_version: 3.1.4
171
+ rubygems_version: 3.1.2
167
172
  signing_key:
168
173
  specification_version: 4
169
174
  summary: Amazon S3 output plugin for Fluentd event collector
170
- test_files:
171
- - test/test_in_s3.rb
172
- - test/test_out_s3.rb
175
+ test_files: []
data/.travis.yml DELETED
@@ -1,24 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.4
5
- - 2.5
6
- - 2.6
7
- - 2.7
8
- - ruby-head
9
-
10
- gemfile:
11
- - Gemfile
12
-
13
- branches:
14
- only:
15
- - master
16
-
17
- before_install: gem update bundler
18
- script: bundle exec rake test
19
-
20
- sudo: false
21
-
22
- matrix:
23
- allow_failures:
24
- - rvm: ruby-head