fluent-plugin-s3 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/docs/v0.12.md ADDED
@@ -0,0 +1,52 @@
1
+ # Configuration: Output (v0.12 style)
2
+
3
+ Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
4
+
5
+ <match pattern>
6
+ @type s3
7
+
8
+ aws_key_id YOUR_AWS_KEY_ID
9
+ aws_sec_key YOUR_AWS_SECRET_KEY
10
+ s3_bucket YOUR_S3_BUCKET_NAME
11
+ s3_region ap-northeast-1
12
+
13
+ path logs/
14
+ s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
15
+ buffer_path /var/log/fluent/s3
16
+ time_slice_format %Y%m%d-%H
17
+ time_slice_wait 10m
18
+ utc
19
+ format json
20
+ </match>
21
+
22
+ If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
23
+
24
+ The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
25
+
26
+ ## format (for v0.12)
27
+
28
+ @format json
29
+ include_time_key true
30
+ time_key log_time # default is time
31
+
32
+ This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
33
+
34
+ ## buffer_path (for v0.12)
35
+
36
+ path prefix of the files to buffer logs.
37
+
38
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
39
+
40
+ ## time_slice_format (for v0.12)
41
+
42
+ Format of the time used as the file name. Default is '%Y%m%d'. Use
43
+ '%Y%m%d%H' to split files hourly.
44
+
45
+ This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
46
+
47
+ ## time_slice_wait (for v0.12)
48
+
49
+ The time to wait old logs. Default is 10 minutes. Specify larger value if
50
+ old logs may reach.
51
+
52
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
@@ -0,0 +1,83 @@
1
+ require "open3"
2
+
3
+ module Fluent::Plugin
4
+ class S3Output
5
+ class ParquetCompressor < Compressor
6
+ S3Output.register_compressor("parquet", self)
7
+
8
+ config_section :compress, multi: false do
9
+ desc "parquet compression codec"
10
+ config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
11
+ desc "parquet file page size"
12
+ config_param :parquet_page_size, :size, default: 8192
13
+ desc "parquet file row group size"
14
+ config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
15
+ desc "record data format type"
16
+ config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
17
+ desc "schema type"
18
+ config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
19
+ desc "path to schema file"
20
+ config_param :schema_file, :string
21
+ end
22
+
23
+ def configure(conf)
24
+ super
25
+ check_command("columnify", "-h")
26
+
27
+ if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
28
+ raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
29
+ end
30
+
31
+ @parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
32
+ if @compress.record_type == :json
33
+ @record_type = :jsonl
34
+ else
35
+ @record_type = @compress.record_type
36
+ end
37
+ end
38
+
39
+ def ext
40
+ "parquet".freeze
41
+ end
42
+
43
+ def content_type
44
+ "application/octet-stream".freeze
45
+ end
46
+
47
+ def compress(chunk, tmp)
48
+ chunk_is_file = @buffer_type == "file"
49
+ path = if chunk_is_file
50
+ chunk.path
51
+ else
52
+ w = Tempfile.new("chunk-parquet-tmp")
53
+ w.binmode
54
+ chunk.write_to(w)
55
+ w.close
56
+ w.path
57
+ end
58
+ stdout, stderr, status = columnify(path, tmp.path)
59
+ unless status.success?
60
+ raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
61
+ end
62
+ ensure
63
+ unless chunk_is_file
64
+ w.close(true) rescue nil
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ def columnify(src_path, dst_path)
71
+ Open3.capture3("columnify",
72
+ "-parquetCompressionCodec", @parquet_compression_codec,
73
+ "-parquetPageSize", @compress.parquet_page_size.to_s,
74
+ "-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
75
+ "-recordType", @record_type.to_s,
76
+ "-schemaType", @compress.schema_type.to_s,
77
+ "-schemaFile", @compress.schema_file,
78
+ "-output", dst_path,
79
+ src_path)
80
+ end
81
+ end
82
+ end
83
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-02-16 00:00:00.000000000 Z
12
+ date: 2021-04-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -123,7 +123,6 @@ extra_rdoc_files: []
123
123
  files:
124
124
  - ".github/workflows/linux.yml"
125
125
  - ".gitignore"
126
- - ".travis.yml"
127
126
  - AUTHORS
128
127
  - ChangeLog
129
128
  - Gemfile
@@ -132,6 +131,11 @@ files:
132
131
  - Rakefile
133
132
  - VERSION
134
133
  - appveyor.yml
134
+ - docs/credentials.md
135
+ - docs/howto.md
136
+ - docs/input.md
137
+ - docs/output.md
138
+ - docs/v0.12.md
135
139
  - fluent-plugin-s3.gemspec
136
140
  - lib/fluent/log-ext.rb
137
141
  - lib/fluent/plugin/in_s3.rb
@@ -139,6 +143,7 @@ files:
139
143
  - lib/fluent/plugin/s3_compressor_gzip_command.rb
140
144
  - lib/fluent/plugin/s3_compressor_lzma2.rb
141
145
  - lib/fluent/plugin/s3_compressor_lzo.rb
146
+ - lib/fluent/plugin/s3_compressor_parquet.rb
142
147
  - lib/fluent/plugin/s3_extractor_gzip_command.rb
143
148
  - lib/fluent/plugin/s3_extractor_lzma2.rb
144
149
  - lib/fluent/plugin/s3_extractor_lzo.rb
@@ -163,10 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
168
  - !ruby/object:Gem::Version
164
169
  version: '0'
165
170
  requirements: []
166
- rubygems_version: 3.1.4
171
+ rubygems_version: 3.1.2
167
172
  signing_key:
168
173
  specification_version: 4
169
174
  summary: Amazon S3 output plugin for Fluentd event collector
170
- test_files:
171
- - test/test_in_s3.rb
172
- - test/test_out_s3.rb
175
+ test_files: []
data/.travis.yml DELETED
@@ -1,24 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.4
5
- - 2.5
6
- - 2.6
7
- - 2.7
8
- - ruby-head
9
-
10
- gemfile:
11
- - Gemfile
12
-
13
- branches:
14
- only:
15
- - master
16
-
17
- before_install: gem update bundler
18
- script: bundle exec rake test
19
-
20
- sudo: false
21
-
22
- matrix:
23
- allow_failures:
24
- - rvm: ruby-head