fluent-plugin-webhdfs 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea946e8bbbf059043af07bb698aba729d7e5b2ecc7699b3275c34cfde49b3412
4
- data.tar.gz: 31a42272f10bb2e16d60a93a923c3de1882cef45f05e76a5c38d86620d927795
3
+ metadata.gz: 2d96f9304470f4d3409a1209b96c56722a62acfefc55633d81525e21103ec7e9
4
+ data.tar.gz: 8c6982670e30e112815a3abec28a2865609e4ca2cccbe097a42b6abb9080af21
5
5
  SHA512:
6
- metadata.gz: 8d0527a147d497f309ef9c4d965ad87348e01e96eabe073810e2c613ba8473988866604a57dd7524d8a66ec4f23bba07d40d4f03d16b28847697a8b7717f9c36
7
- data.tar.gz: d03c99a56a7a0e34424c923f0b3179bf038e46dee4a765142adbb9944dfe9c72d41c27a80c6757fdfb63f071a00f390dac1e74275a884406a3aa65b065481723
6
+ metadata.gz: a56a3b8ac2e7bf279ddb23d5a4fafb187289883442b01355f86cf3d626332aff35a78f4fee38c32767a7162741b550143fbfc50f9d953db0ce8a8220d022d35f
7
+ data.tar.gz: 7110d25391fc90d0e0aa8042014596994b3fe700737a08febd1f1d69485fd9eef212f0882e51832e2a3f95d64a7e21e0091f8f8e37444151404104230645029d
data/README.md CHANGED
@@ -157,18 +157,30 @@ If you want to compress data before storing it:
157
157
  host namenode.your.cluster.local
158
158
  port 50070
159
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
160
- compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
160
+ compress gzip # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
161
161
  </match>
162
162
 
163
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
164
164
  Note that you have to install additional gem for several compress algorithms:
165
165
 
166
166
  - snappy: install snappy gem
167
+ - hadoop_snappy: install snappy gem
167
168
  - bzip2: install bzip2-ffi gem
168
169
  - zstd: install zstandard gem
169
170
 
170
171
  Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
171
172
 
173
+ You can also specify compression block size (currently supported only for Snappy codecs):
174
+
175
+ <match access.**>
176
+ @type webhdfs
177
+ host namenode.your.cluster.local
178
+ port 50070
179
+ path /path/on/hdfs/access.log.%Y%m%d_%H
180
+ compress hadoop_snappy
181
+ block_size 32768
182
+ </match>
183
+
172
184
  If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
173
185
 
174
186
  <match access.**>
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.3.2"
5
+ gem.version = "1.4.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -67,8 +67,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
- desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
71
+ desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
74
74
  desc 'HDFS file extensions (overrides default compressor extensions)'
@@ -156,6 +156,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
156
156
  end
157
157
 
158
158
  @compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
159
+ @compressor.configure(conf)
159
160
 
160
161
  if @host
161
162
  @namenode_host = @host
@@ -511,7 +512,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
511
512
  begin
512
513
  Open3.capture3("#{command} -V")
513
514
  rescue Errno::ENOENT
514
- raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
515
+ raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
515
516
  end
516
517
  end
517
518
  end
@@ -527,5 +528,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
527
528
  require 'fluent/plugin/webhdfs_compressor_gzip'
528
529
  require 'fluent/plugin/webhdfs_compressor_bzip2'
529
530
  require 'fluent/plugin/webhdfs_compressor_snappy'
531
+ require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
530
532
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
531
533
  require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -0,0 +1,32 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class HadoopSnappyCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('hadoop_snappy', self)
5
+
6
+ DEFAULT_BLOCK_SIZE = 256 * 1024
7
+
8
+ desc 'Block size for compression algorithm'
9
+ config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
10
+
11
+ def initialize(options = {})
12
+ super()
13
+ begin
14
+ require "snappy"
15
+ rescue LoadError
16
+ raise Fluent::ConfigError, "Install snappy before using snappy compressor"
17
+ end
18
+ end
19
+
20
+ def ext
21
+ ".snappy"
22
+ end
23
+
24
+ def compress(chunk, tmp)
25
+ Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
26
+ w << chunk.read
27
+ w.flush
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -5,8 +5,8 @@ module Fluent::Plugin
5
5
 
6
6
  config_param :command_parameter, :string, default: '-qf1'
7
7
 
8
- def configure(conf)
9
- super
8
+ def initialize(options = {})
9
+ super()
10
10
  check_command('lzop', 'LZO')
11
11
  end
12
12
 
@@ -3,11 +3,17 @@ module Fluent::Plugin
3
3
  class SnappyCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('snappy', self)
5
5
 
6
+ DEFAULT_BLOCK_SIZE = 32 * 1024
7
+
8
+ desc 'Block size for compression algorithm'
9
+ config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
10
+
6
11
  def initialize(options = {})
12
+ super()
7
13
  begin
8
14
  require "snappy"
9
15
  rescue LoadError
10
- raise Fluent::ConfigError, "Install snappy before use snappy compressor"
16
+ raise Fluent::ConfigError, "Install snappy before using snappy compressor"
11
17
  end
12
18
  end
13
19
 
@@ -16,7 +22,7 @@ module Fluent::Plugin
16
22
  end
17
23
 
18
24
  def compress(chunk, tmp)
19
- Snappy::Writer.new(tmp) do |w|
25
+ Snappy::Writer.new(tmp, @block_size) do |w|
20
26
  w << chunk.read
21
27
  w.flush
22
28
  end
@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
107
107
  data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
108
108
  bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
109
109
  snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
110
+ hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
110
111
  lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
111
112
  def test_compress(data)
112
113
  compress_type, compressor_class = data
@@ -148,6 +149,23 @@ class WebHDFSOutputTest < Test::Unit::TestCase
148
149
  assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
149
150
  end
150
151
 
152
+ data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
153
+ hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
154
+ def test_compression_block_size(data)
155
+ compress_type, compressor_class = data
156
+ conf = config_element(
157
+ "ROOT", "", {
158
+ "host" => "namenode.local",
159
+ "path" => "/hdfs/path/file.%Y%m%d.log",
160
+ "compress" => compress_type,
161
+ "block_size" => 16384
162
+ })
163
+ d = create_driver(conf)
164
+
165
+ assert_equal compress_type, d.instance.compress
166
+ assert_equal 16384, d.instance.compressor.block_size
167
+ end
168
+
151
169
  def test_placeholders_old_style
152
170
  conf = config_element(
153
171
  "ROOT", "", {
@@ -5,7 +5,7 @@ begin
5
5
  rescue LoadError
6
6
  end
7
7
 
8
- class CompressorTest < Test::Unit::TestCase
8
+ class SnappyCompressorsTest < Test::Unit::TestCase
9
9
  class Snappy < self
10
10
 
11
11
  CONFIG = %[
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
16
16
  def setup
17
17
  omit unless Object.const_defined?(:Snappy)
18
18
  Fluent::Test.setup
19
- @compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
19
+
20
+ @compressors_size = 2
21
+ @compressors = [
22
+ Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
23
+ Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
24
+ ]
25
+ @readers = [
26
+ ::Snappy::Reader,
27
+ ::Snappy::Hadoop::Reader
28
+ ]
29
+ @exts = [".sz", ".snappy"]
20
30
  end
21
31
 
22
32
  def create_driver(conf = CONFIG)
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
24
34
  end
25
35
 
26
36
  def test_ext
27
- assert_equal(".sz", @compressor.ext)
37
+ for i in 0...@compressors_size do
38
+ assert_equal(@exts[i], @compressors[i].ext)
39
+ end
28
40
  end
29
41
 
30
42
  def test_compress
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
43
55
  chunk << "hello snappy\n" * 32 * 1024
44
56
  end
45
57
 
46
- io = Tempfile.new("snappy-")
47
- @compressor.compress(chunk, io)
48
- io.open
49
- chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
50
- assert(chunk_bytesize > io.read.bytesize)
51
- io.rewind
52
- reader = ::Snappy::Reader.new(io)
53
- assert_equal(chunk.read, reader.read)
54
- io.close
58
+ for i in 0...@compressors_size do
59
+ io = Tempfile.new("snappy-")
60
+ @compressors[i].compress(chunk, io)
61
+ io.open
62
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
63
+ assert(chunk_bytesize > io.read.bytesize)
64
+ io.rewind
65
+ reader = @readers[i].new(io)
66
+ assert_equal(chunk.read, reader.read)
67
+ io.close
68
+ end
55
69
  end
56
70
  end
57
71
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-02 00:00:00.000000000 Z
11
+ date: 2020-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -154,14 +154,15 @@ files:
154
154
  - lib/fluent/plugin/out_webhdfs.rb
155
155
  - lib/fluent/plugin/webhdfs_compressor_bzip2.rb
156
156
  - lib/fluent/plugin/webhdfs_compressor_gzip.rb
157
+ - lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
157
158
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
158
159
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
159
160
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
161
  - lib/fluent/plugin/webhdfs_compressor_zstd.rb
161
162
  - test/helper.rb
162
- - test/plugin/test_compressor.rb
163
163
  - test/plugin/test_gzip_compressor.rb
164
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_snappy_compressors.rb
165
166
  - test/plugin/test_zstd_compressor.rb
166
167
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
167
168
  licenses:
@@ -188,7 +189,7 @@ specification_version: 4
188
189
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
189
190
  test_files:
190
191
  - test/helper.rb
191
- - test/plugin/test_compressor.rb
192
192
  - test/plugin/test_gzip_compressor.rb
193
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_snappy_compressors.rb
194
195
  - test/plugin/test_zstd_compressor.rb