fluent-plugin-webhdfs 1.3.2 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea946e8bbbf059043af07bb698aba729d7e5b2ecc7699b3275c34cfde49b3412
4
- data.tar.gz: 31a42272f10bb2e16d60a93a923c3de1882cef45f05e76a5c38d86620d927795
3
+ metadata.gz: 2d96f9304470f4d3409a1209b96c56722a62acfefc55633d81525e21103ec7e9
4
+ data.tar.gz: 8c6982670e30e112815a3abec28a2865609e4ca2cccbe097a42b6abb9080af21
5
5
  SHA512:
6
- metadata.gz: 8d0527a147d497f309ef9c4d965ad87348e01e96eabe073810e2c613ba8473988866604a57dd7524d8a66ec4f23bba07d40d4f03d16b28847697a8b7717f9c36
7
- data.tar.gz: d03c99a56a7a0e34424c923f0b3179bf038e46dee4a765142adbb9944dfe9c72d41c27a80c6757fdfb63f071a00f390dac1e74275a884406a3aa65b065481723
6
+ metadata.gz: a56a3b8ac2e7bf279ddb23d5a4fafb187289883442b01355f86cf3d626332aff35a78f4fee38c32767a7162741b550143fbfc50f9d953db0ce8a8220d022d35f
7
+ data.tar.gz: 7110d25391fc90d0e0aa8042014596994b3fe700737a08febd1f1d69485fd9eef212f0882e51832e2a3f95d64a7e21e0091f8f8e37444151404104230645029d
data/README.md CHANGED
@@ -157,18 +157,30 @@ If you want to compress data before storing it:
157
157
  host namenode.your.cluster.local
158
158
  port 50070
159
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
160
- compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
160
+ compress gzip # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
161
161
  </match>
162
162
 
163
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
164
164
  Note that you have to install additional gem for several compress algorithms:
165
165
 
166
166
  - snappy: install snappy gem
167
+ - hadoop_snappy: install snappy gem
167
168
  - bzip2: install bzip2-ffi gem
168
169
  - zstd: install zstandard gem
169
170
 
170
171
  Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
171
172
 
173
+ You can also specify compression block size (currently supported only for Snappy codecs):
174
+
175
+ <match access.**>
176
+ @type webhdfs
177
+ host namenode.your.cluster.local
178
+ port 50070
179
+ path /path/on/hdfs/access.log.%Y%m%d_%H
180
+ compress hadoop_snappy
181
+ block_size 32768
182
+ </match>
183
+
172
184
  If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
173
185
 
174
186
  <match access.**>
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.3.2"
5
+ gem.version = "1.4.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -67,8 +67,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
- desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
71
+ desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
74
74
  desc 'HDFS file extensions (overrides default compressor extensions)'
@@ -156,6 +156,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
156
156
  end
157
157
 
158
158
  @compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
159
+ @compressor.configure(conf)
159
160
 
160
161
  if @host
161
162
  @namenode_host = @host
@@ -511,7 +512,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
511
512
  begin
512
513
  Open3.capture3("#{command} -V")
513
514
  rescue Errno::ENOENT
514
- raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
515
+ raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
515
516
  end
516
517
  end
517
518
  end
@@ -527,5 +528,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
527
528
  require 'fluent/plugin/webhdfs_compressor_gzip'
528
529
  require 'fluent/plugin/webhdfs_compressor_bzip2'
529
530
  require 'fluent/plugin/webhdfs_compressor_snappy'
531
+ require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
530
532
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
531
533
  require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -0,0 +1,32 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class HadoopSnappyCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('hadoop_snappy', self)
5
+
6
+ DEFAULT_BLOCK_SIZE = 256 * 1024
7
+
8
+ desc 'Block size for compression algorithm'
9
+ config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
10
+
11
+ def initialize(options = {})
12
+ super()
13
+ begin
14
+ require "snappy"
15
+ rescue LoadError
16
+ raise Fluent::ConfigError, "Install snappy before using snappy compressor"
17
+ end
18
+ end
19
+
20
+ def ext
21
+ ".snappy"
22
+ end
23
+
24
+ def compress(chunk, tmp)
25
+ Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
26
+ w << chunk.read
27
+ w.flush
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -5,8 +5,8 @@ module Fluent::Plugin
5
5
 
6
6
  config_param :command_parameter, :string, default: '-qf1'
7
7
 
8
- def configure(conf)
9
- super
8
+ def initialize(options = {})
9
+ super()
10
10
  check_command('lzop', 'LZO')
11
11
  end
12
12
 
@@ -3,11 +3,17 @@ module Fluent::Plugin
3
3
  class SnappyCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('snappy', self)
5
5
 
6
+ DEFAULT_BLOCK_SIZE = 32 * 1024
7
+
8
+ desc 'Block size for compression algorithm'
9
+ config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
10
+
6
11
  def initialize(options = {})
12
+ super()
7
13
  begin
8
14
  require "snappy"
9
15
  rescue LoadError
10
- raise Fluent::ConfigError, "Install snappy before use snappy compressor"
16
+ raise Fluent::ConfigError, "Install snappy before using snappy compressor"
11
17
  end
12
18
  end
13
19
 
@@ -16,7 +22,7 @@ module Fluent::Plugin
16
22
  end
17
23
 
18
24
  def compress(chunk, tmp)
19
- Snappy::Writer.new(tmp) do |w|
25
+ Snappy::Writer.new(tmp, @block_size) do |w|
20
26
  w << chunk.read
21
27
  w.flush
22
28
  end
@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
107
107
  data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
108
108
  bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
109
109
  snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
110
+ hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
110
111
  lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
111
112
  def test_compress(data)
112
113
  compress_type, compressor_class = data
@@ -148,6 +149,23 @@ class WebHDFSOutputTest < Test::Unit::TestCase
148
149
  assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
149
150
  end
150
151
 
152
+ data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
153
+ hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
154
+ def test_compression_block_size(data)
155
+ compress_type, compressor_class = data
156
+ conf = config_element(
157
+ "ROOT", "", {
158
+ "host" => "namenode.local",
159
+ "path" => "/hdfs/path/file.%Y%m%d.log",
160
+ "compress" => compress_type,
161
+ "block_size" => 16384
162
+ })
163
+ d = create_driver(conf)
164
+
165
+ assert_equal compress_type, d.instance.compress
166
+ assert_equal 16384, d.instance.compressor.block_size
167
+ end
168
+
151
169
  def test_placeholders_old_style
152
170
  conf = config_element(
153
171
  "ROOT", "", {
@@ -5,7 +5,7 @@ begin
5
5
  rescue LoadError
6
6
  end
7
7
 
8
- class CompressorTest < Test::Unit::TestCase
8
+ class SnappyCompressorsTest < Test::Unit::TestCase
9
9
  class Snappy < self
10
10
 
11
11
  CONFIG = %[
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
16
16
  def setup
17
17
  omit unless Object.const_defined?(:Snappy)
18
18
  Fluent::Test.setup
19
- @compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
19
+
20
+ @compressors_size = 2
21
+ @compressors = [
22
+ Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
23
+ Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
24
+ ]
25
+ @readers = [
26
+ ::Snappy::Reader,
27
+ ::Snappy::Hadoop::Reader
28
+ ]
29
+ @exts = [".sz", ".snappy"]
20
30
  end
21
31
 
22
32
  def create_driver(conf = CONFIG)
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
24
34
  end
25
35
 
26
36
  def test_ext
27
- assert_equal(".sz", @compressor.ext)
37
+ for i in 0...@compressors_size do
38
+ assert_equal(@exts[i], @compressors[i].ext)
39
+ end
28
40
  end
29
41
 
30
42
  def test_compress
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
43
55
  chunk << "hello snappy\n" * 32 * 1024
44
56
  end
45
57
 
46
- io = Tempfile.new("snappy-")
47
- @compressor.compress(chunk, io)
48
- io.open
49
- chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
50
- assert(chunk_bytesize > io.read.bytesize)
51
- io.rewind
52
- reader = ::Snappy::Reader.new(io)
53
- assert_equal(chunk.read, reader.read)
54
- io.close
58
+ for i in 0...@compressors_size do
59
+ io = Tempfile.new("snappy-")
60
+ @compressors[i].compress(chunk, io)
61
+ io.open
62
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
63
+ assert(chunk_bytesize > io.read.bytesize)
64
+ io.rewind
65
+ reader = @readers[i].new(io)
66
+ assert_equal(chunk.read, reader.read)
67
+ io.close
68
+ end
55
69
  end
56
70
  end
57
71
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-02 00:00:00.000000000 Z
11
+ date: 2020-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -154,14 +154,15 @@ files:
154
154
  - lib/fluent/plugin/out_webhdfs.rb
155
155
  - lib/fluent/plugin/webhdfs_compressor_bzip2.rb
156
156
  - lib/fluent/plugin/webhdfs_compressor_gzip.rb
157
+ - lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
157
158
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
158
159
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
159
160
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
161
  - lib/fluent/plugin/webhdfs_compressor_zstd.rb
161
162
  - test/helper.rb
162
- - test/plugin/test_compressor.rb
163
163
  - test/plugin/test_gzip_compressor.rb
164
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_snappy_compressors.rb
165
166
  - test/plugin/test_zstd_compressor.rb
166
167
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
167
168
  licenses:
@@ -188,7 +189,7 @@ specification_version: 4
188
189
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
189
190
  test_files:
190
191
  - test/helper.rb
191
- - test/plugin/test_compressor.rb
192
192
  - test/plugin/test_gzip_compressor.rb
193
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_snappy_compressors.rb
194
195
  - test/plugin/test_zstd_compressor.rb