RubyGems - fluent-plugin-webhdfs - Versions diffs - 1.3.2 → 1.4.0 - Mend

fluent-plugin-webhdfs 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +14 -2
data/fluent-plugin-webhdfs.gemspec +1 -1
data/lib/fluent/plugin/out_webhdfs.rb +5 -3
data/lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb +32 -0
data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +2 -2
data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +8 -2
data/test/plugin/test_out_webhdfs.rb +18 -0
data/test/plugin/{test_compressor.rb → test_snappy_compressors.rb} +26 -12
metadata +5 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ea946e8bbbf059043af07bb698aba729d7e5b2ecc7699b3275c34cfde49b3412
-  data.tar.gz: 31a42272f10bb2e16d60a93a923c3de1882cef45f05e76a5c38d86620d927795
+  metadata.gz: 2d96f9304470f4d3409a1209b96c56722a62acfefc55633d81525e21103ec7e9
+  data.tar.gz: 8c6982670e30e112815a3abec28a2865609e4ca2cccbe097a42b6abb9080af21
 SHA512:
-  metadata.gz: 8d0527a147d497f309ef9c4d965ad87348e01e96eabe073810e2c613ba8473988866604a57dd7524d8a66ec4f23bba07d40d4f03d16b28847697a8b7717f9c36
-  data.tar.gz: d03c99a56a7a0e34424c923f0b3179bf038e46dee4a765142adbb9944dfe9c72d41c27a80c6757fdfb63f071a00f390dac1e74275a884406a3aa65b065481723
+  metadata.gz: a56a3b8ac2e7bf279ddb23d5a4fafb187289883442b01355f86cf3d626332aff35a78f4fee38c32767a7162741b550143fbfc50f9d953db0ce8a8220d022d35f
+  data.tar.gz: 7110d25391fc90d0e0aa8042014596994b3fe700737a08febd1f1d69485fd9eef212f0882e51832e2a3f95d64a7e21e0091f8f8e37444151404104230645029d

data/README.md CHANGED

@@ -157,18 +157,30 @@ If you want to compress data before storing it:
       host namenode.your.cluster.local
       port 50070
       path /path/on/hdfs/access.log.%Y%m%d_%H
-      compress gzip  # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
+      compress gzip  # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
     </match>
-Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
+Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
 Note that you have to install additional gem for several compress algorithms:
 - snappy: install snappy gem
+- hadoop_snappy: install snappy gem
 - bzip2: install bzip2-ffi gem
 - zstd: install zstandard gem
 Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
+You can also specify compression block size (currently supported only for Snappy codecs):
+    <match access.**>
+      @type webhdfs
+      host namenode.your.cluster.local
+      port 50070
+      path /path/on/hdfs/access.log.%Y%m%d_%H
+      compress hadoop_snappy
+      block_size 32768
+    </match>
 If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
     <match access.**>

data/fluent-plugin-webhdfs.gemspec CHANGED

@@ -2,7 +2,7 @@
 Gem::Specification.new do |gem|
   gem.name          = "fluent-plugin-webhdfs"
-  gem.version       = "1.3.2"
+  gem.version       = "1.4.0"
   gem.authors       = ["TAGOMORI Satoshi"]
   gem.email         = ["tagomoris@gmail.com"]
   gem.summary       = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}

data/lib/fluent/plugin/out_webhdfs.rb CHANGED

@@ -67,8 +67,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
   desc 'kerberos keytab file'
   config_param :kerberos_keytab, :string, default: nil
-  SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
-  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
+  SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
+  desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
   config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
   desc 'HDFS file extensions (overrides default compressor extensions)'
@@ -156,6 +156,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
     end
     @compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
+    @compressor.configure(conf)
     if @host
       @namenode_host = @host
@@ -511,7 +512,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
       begin
         Open3.capture3("#{command} -V")
       rescue Errno::ENOENT
-        raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
+        raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
       end
     end
   end
@@ -527,5 +528,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
 require 'fluent/plugin/webhdfs_compressor_gzip'
 require 'fluent/plugin/webhdfs_compressor_bzip2'
 require 'fluent/plugin/webhdfs_compressor_snappy'
+require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
 require 'fluent/plugin/webhdfs_compressor_lzo_command'
 require 'fluent/plugin/webhdfs_compressor_zstd'

data/lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb ADDED

@@ -0,0 +1,32 @@
+module Fluent::Plugin
+  class WebHDFSOutput < Output
+    class HadoopSnappyCompressor < Compressor
+      WebHDFSOutput.register_compressor('hadoop_snappy', self)
+      DEFAULT_BLOCK_SIZE = 256 * 1024
+      desc 'Block size for compression algorithm'
+      config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
+      def initialize(options = {})
+        super()
+        begin
+          require "snappy"
+        rescue LoadError
+          raise Fluent::ConfigError, "Install snappy before using snappy compressor"
+        end
+      end
+      def ext
+        ".snappy"
+      end
+      def compress(chunk, tmp)
+        Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
+          w << chunk.read
+          w.flush
+        end
+      end
+    end
+  end
+end

data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb CHANGED

@@ -5,8 +5,8 @@ module Fluent::Plugin
       config_param :command_parameter, :string, default: '-qf1'
-      def configure(conf)
-        super
+      def initialize(options = {})
+        super()
         check_command('lzop', 'LZO')
       end

data/lib/fluent/plugin/webhdfs_compressor_snappy.rb CHANGED

@@ -3,11 +3,17 @@ module Fluent::Plugin
     class SnappyCompressor < Compressor
       WebHDFSOutput.register_compressor('snappy', self)
+      DEFAULT_BLOCK_SIZE = 32 * 1024
+      desc 'Block size for compression algorithm'
+      config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
       def initialize(options = {})
+        super()
         begin
           require "snappy"
         rescue LoadError
-          raise Fluent::ConfigError, "Install snappy before use snappy compressor"
+          raise Fluent::ConfigError, "Install snappy before using snappy compressor"
         end
       end
@@ -16,7 +22,7 @@ module Fluent::Plugin
       end
       def compress(chunk, tmp)
-        Snappy::Writer.new(tmp) do |w|
+        Snappy::Writer.new(tmp, @block_size) do |w|
           w << chunk.read
           w.flush
         end

data/test/plugin/test_out_webhdfs.rb CHANGED

@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
     data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
          bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
          snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
+         hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
          lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
     def test_compress(data)
       compress_type, compressor_class = data
@@ -148,6 +149,23 @@ class WebHDFSOutputTest < Test::Unit::TestCase
       assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
     end
+    data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
+         hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
+    def test_compression_block_size(data)
+      compress_type, compressor_class = data
+      conf = config_element(
+        "ROOT", "", {
+          "host" => "namenode.local",
+          "path" => "/hdfs/path/file.%Y%m%d.log",
+          "compress" => compress_type,
+          "block_size" => 16384
+        })
+      d = create_driver(conf)
+      assert_equal compress_type, d.instance.compress
+      assert_equal 16384, d.instance.compressor.block_size
+    end
     def test_placeholders_old_style
       conf = config_element(
         "ROOT", "", {

data/test/plugin/{test_compressor.rb → test_snappy_compressors.rb} RENAMED

@@ -5,7 +5,7 @@ begin
 rescue LoadError
 end
-class CompressorTest < Test::Unit::TestCase
+class SnappyCompressorsTest < Test::Unit::TestCase
   class Snappy < self
     CONFIG = %[
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
     def setup
       omit unless Object.const_defined?(:Snappy)
       Fluent::Test.setup
-      @compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
+      @compressors_size = 2
+      @compressors = [
+        Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
+        Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
+      ]
+      @readers = [
+        ::Snappy::Reader,
+        ::Snappy::Hadoop::Reader
+      ]
+      @exts = [".sz", ".snappy"]
     end
     def create_driver(conf = CONFIG)
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
     end
     def test_ext
-      assert_equal(".sz", @compressor.ext)
+      for i in 0...@compressors_size do
+        assert_equal(@exts[i], @compressors[i].ext)
+      end
     end
     def test_compress
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
         chunk << "hello snappy\n" * 32 * 1024
       end
-      io = Tempfile.new("snappy-")
-      @compressor.compress(chunk, io)
-      io.open
-      chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
-      assert(chunk_bytesize > io.read.bytesize)
-      io.rewind
-      reader = ::Snappy::Reader.new(io)
-      assert_equal(chunk.read, reader.read)
-      io.close
+      for i in 0...@compressors_size do
+        io = Tempfile.new("snappy-")
+        @compressors[i].compress(chunk, io)
+        io.open
+        chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
+        assert(chunk_bytesize > io.read.bytesize)
+        io.rewind
+        reader = @readers[i].new(io)
+        assert_equal(chunk.read, reader.read)
+        io.close
+      end
     end
   end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-webhdfs
 version: !ruby/object:Gem::Version
-  version: 1.3.2
+  version: 1.4.0
 platform: ruby
 authors:
 - TAGOMORI Satoshi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-12-02 00:00:00.000000000 Z
+date: 2020-12-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -154,14 +154,15 @@ files:
 - lib/fluent/plugin/out_webhdfs.rb
 - lib/fluent/plugin/webhdfs_compressor_bzip2.rb
 - lib/fluent/plugin/webhdfs_compressor_gzip.rb
+- lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
 - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
 - lib/fluent/plugin/webhdfs_compressor_snappy.rb
 - lib/fluent/plugin/webhdfs_compressor_text.rb
 - lib/fluent/plugin/webhdfs_compressor_zstd.rb
 - test/helper.rb
-- test/plugin/test_compressor.rb
 - test/plugin/test_gzip_compressor.rb
 - test/plugin/test_out_webhdfs.rb
+- test/plugin/test_snappy_compressors.rb
 - test/plugin/test_zstd_compressor.rb
 homepage: https://github.com/fluent/fluent-plugin-webhdfs
 licenses:
@@ -188,7 +189,7 @@ specification_version: 4
 summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
 test_files:
 - test/helper.rb
-- test/plugin/test_compressor.rb
 - test/plugin/test_gzip_compressor.rb
 - test/plugin/test_out_webhdfs.rb
+- test/plugin/test_snappy_compressors.rb
 - test/plugin/test_zstd_compressor.rb