RubyGems - fluent-plugin-s3 - Versions diffs - 1.5.1 → 1.6.0 - Mend

fluent-plugin-s3 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/ChangeLog +4 -0
data/README.md +11 -781
data/VERSION +1 -1
data/docs/credentials.md +171 -0
data/docs/howto.md +92 -0
data/docs/input.md +90 -0
data/docs/output.md +445 -0
data/docs/v0.12.md +52 -0
data/lib/fluent/plugin/s3_compressor_parquet.rb +83 -0
metadata +10 -7
data/.travis.yml +0 -24

data/docs/v0.12.md ADDED Viewed

@@ -0,0 +1,52 @@
+# Configuration: Output (v0.12 style)
+Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
+    <match pattern>
+      @type s3
+      aws_key_id YOUR_AWS_KEY_ID
+      aws_sec_key YOUR_AWS_SECRET_KEY
+      s3_bucket YOUR_S3_BUCKET_NAME
+      s3_region ap-northeast-1
+      path logs/
+      s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
+      buffer_path /var/log/fluent/s3
+      time_slice_format %Y%m%d-%H
+      time_slice_wait 10m
+      utc
+      format json
+    </match>
+If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
+The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
+## format (for v0.12)
+    @format json
+    include_time_key true
+    time_key log_time # default is time
+This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
+## buffer_path (for v0.12)
+path prefix of the files to buffer logs.
+This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
+## time_slice_format (for v0.12)
+Format of the time used as the file name. Default is '%Y%m%d'. Use
+'%Y%m%d%H' to split files hourly.
+This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
+## time_slice_wait (for v0.12)
+The time to wait old logs. Default is 10 minutes. Specify larger value if
+old logs may reach.
+This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.

data/lib/fluent/plugin/s3_compressor_parquet.rb ADDED Viewed

@@ -0,0 +1,83 @@
+require "open3"
+module Fluent::Plugin
+  class S3Output
+    class ParquetCompressor < Compressor
+      S3Output.register_compressor("parquet", self)
+      config_section :compress, multi: false do
+        desc "parquet compression codec"
+        config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
+        desc "parquet file page size"
+        config_param :parquet_page_size, :size, default: 8192
+        desc "parquet file row group size"
+        config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
+        desc "record data format type"
+        config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
+        desc "schema type"
+        config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
+        desc "path to schema file"
+        config_param :schema_file, :string
+      end
+      def configure(conf)
+        super
+        check_command("columnify", "-h")
+        if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
+          raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
+        end
+        @parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
+        if @compress.record_type == :json
+          @record_type = :jsonl
+        else
+          @record_type = @compress.record_type
+        end
+      end
+      def ext
+        "parquet".freeze
+      end
+      def content_type
+        "application/octet-stream".freeze
+      end
+      def compress(chunk, tmp)
+        chunk_is_file = @buffer_type == "file"
+        path = if chunk_is_file
+                 chunk.path
+               else
+                 w = Tempfile.new("chunk-parquet-tmp")
+                 w.binmode
+                 chunk.write_to(w)
+                 w.close
+                 w.path
+               end
+        stdout, stderr, status = columnify(path, tmp.path)
+        unless status.success?
+          raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
+        end
+      ensure
+        unless chunk_is_file
+          w.close(true) rescue nil
+        end
+      end
+      private
+      def columnify(src_path, dst_path)
+        Open3.capture3("columnify",
+                       "-parquetCompressionCodec", @parquet_compression_codec,
+                       "-parquetPageSize", @compress.parquet_page_size.to_s,
+                       "-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
+                       "-recordType", @record_type.to_s,
+                       "-schemaType", @compress.schema_type.to_s,
+                       "-schemaFile", @compress.schema_file,
+                       "-output", dst_path,
+                       src_path)
+      end
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-s3
 version: !ruby/object:Gem::Version
-  version: 1.5.1
+  version: 1.6.0
 platform: ruby
 authors:
 - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-02-16 00:00:00.000000000 Z
+date: 2021-04-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: fluentd
@@ -123,7 +123,6 @@ extra_rdoc_files: []
 files:
 - ".github/workflows/linux.yml"
 - ".gitignore"
-- ".travis.yml"
 - AUTHORS
 - ChangeLog
 - Gemfile
@@ -132,6 +131,11 @@ files:
 - Rakefile
 - VERSION
 - appveyor.yml
+- docs/credentials.md
+- docs/howto.md
+- docs/input.md
+- docs/output.md
+- docs/v0.12.md
 - fluent-plugin-s3.gemspec
 - lib/fluent/log-ext.rb
 - lib/fluent/plugin/in_s3.rb
@@ -139,6 +143,7 @@ files:
 - lib/fluent/plugin/s3_compressor_gzip_command.rb
 - lib/fluent/plugin/s3_compressor_lzma2.rb
 - lib/fluent/plugin/s3_compressor_lzo.rb
+- lib/fluent/plugin/s3_compressor_parquet.rb
 - lib/fluent/plugin/s3_extractor_gzip_command.rb
 - lib/fluent/plugin/s3_extractor_lzma2.rb
 - lib/fluent/plugin/s3_extractor_lzo.rb
@@ -163,10 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.4
+rubygems_version: 3.1.2
 signing_key:
 specification_version: 4
 summary: Amazon S3 output plugin for Fluentd event collector
-test_files:
-- test/test_in_s3.rb
-- test/test_out_s3.rb
+test_files: []

data/.travis.yml DELETED Viewed

@@ -1,24 +0,0 @@
-language: ruby
-rvm:
-  - 2.4
-  - 2.5
-  - 2.6
-  - 2.7
-  - ruby-head
-gemfile:
- - Gemfile
-branches:
-  only:
-    - master
-before_install: gem update bundler
-script: bundle exec rake test
-sudo: false
-matrix:
-  allow_failures:
-    - rvm: ruby-head