fluent-plugin-webhdfs 1.3.2 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -2
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +5 -3
- data/lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb +32 -0
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +8 -2
- data/test/plugin/test_out_webhdfs.rb +18 -0
- data/test/plugin/{test_compressor.rb → test_snappy_compressors.rb} +26 -12
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d96f9304470f4d3409a1209b96c56722a62acfefc55633d81525e21103ec7e9
|
4
|
+
data.tar.gz: 8c6982670e30e112815a3abec28a2865609e4ca2cccbe097a42b6abb9080af21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a56a3b8ac2e7bf279ddb23d5a4fafb187289883442b01355f86cf3d626332aff35a78f4fee38c32767a7162741b550143fbfc50f9d953db0ce8a8220d022d35f
|
7
|
+
data.tar.gz: 7110d25391fc90d0e0aa8042014596994b3fe700737a08febd1f1d69485fd9eef212f0882e51832e2a3f95d64a7e21e0091f8f8e37444151404104230645029d
|
data/README.md
CHANGED
@@ -157,18 +157,30 @@ If you want to compress data before storing it:
|
|
157
157
|
host namenode.your.cluster.local
|
158
158
|
port 50070
|
159
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
160
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
|
161
161
|
</match>
|
162
162
|
|
163
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`,
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
|
164
164
|
Note that you have to install additional gem for several compress algorithms:
|
165
165
|
|
166
166
|
- snappy: install snappy gem
|
167
|
+
- hadoop_snappy: install snappy gem
|
167
168
|
- bzip2: install bzip2-ffi gem
|
168
169
|
- zstd: install zstandard gem
|
169
170
|
|
170
171
|
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
171
172
|
|
173
|
+
You can also specify compression block size (currently supported only for Snappy codecs):
|
174
|
+
|
175
|
+
<match access.**>
|
176
|
+
@type webhdfs
|
177
|
+
host namenode.your.cluster.local
|
178
|
+
port 50070
|
179
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
180
|
+
compress hadoop_snappy
|
181
|
+
block_size 32768
|
182
|
+
</match>
|
183
|
+
|
172
184
|
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
173
185
|
|
174
186
|
<match access.**>
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.4.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -67,8 +67,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
69
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
|
-
desc "
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
|
71
|
+
desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
73
|
|
74
74
|
desc 'HDFS file extensions (overrides default compressor extensions)'
|
@@ -156,6 +156,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
156
156
|
end
|
157
157
|
|
158
158
|
@compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
|
159
|
+
@compressor.configure(conf)
|
159
160
|
|
160
161
|
if @host
|
161
162
|
@namenode_host = @host
|
@@ -511,7 +512,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
511
512
|
begin
|
512
513
|
Open3.capture3("#{command} -V")
|
513
514
|
rescue Errno::ENOENT
|
514
|
-
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
515
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
515
516
|
end
|
516
517
|
end
|
517
518
|
end
|
@@ -527,5 +528,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
|
|
527
528
|
require 'fluent/plugin/webhdfs_compressor_gzip'
|
528
529
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
529
530
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
531
|
+
require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
|
530
532
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
531
533
|
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class HadoopSnappyCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('hadoop_snappy', self)
|
5
|
+
|
6
|
+
DEFAULT_BLOCK_SIZE = 256 * 1024
|
7
|
+
|
8
|
+
desc 'Block size for compression algorithm'
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
super()
|
13
|
+
begin
|
14
|
+
require "snappy"
|
15
|
+
rescue LoadError
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def ext
|
21
|
+
".snappy"
|
22
|
+
end
|
23
|
+
|
24
|
+
def compress(chunk, tmp)
|
25
|
+
Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
|
26
|
+
w << chunk.read
|
27
|
+
w.flush
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -3,11 +3,17 @@ module Fluent::Plugin
|
|
3
3
|
class SnappyCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('snappy', self)
|
5
5
|
|
6
|
+
DEFAULT_BLOCK_SIZE = 32 * 1024
|
7
|
+
|
8
|
+
desc 'Block size for compression algorithm'
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
10
|
+
|
6
11
|
def initialize(options = {})
|
12
|
+
super()
|
7
13
|
begin
|
8
14
|
require "snappy"
|
9
15
|
rescue LoadError
|
10
|
-
raise Fluent::ConfigError, "Install snappy before
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
11
17
|
end
|
12
18
|
end
|
13
19
|
|
@@ -16,7 +22,7 @@ module Fluent::Plugin
|
|
16
22
|
end
|
17
23
|
|
18
24
|
def compress(chunk, tmp)
|
19
|
-
Snappy::Writer.new(tmp) do |w|
|
25
|
+
Snappy::Writer.new(tmp, @block_size) do |w|
|
20
26
|
w << chunk.read
|
21
27
|
w.flush
|
22
28
|
end
|
@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
107
107
|
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
108
108
|
bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
|
109
109
|
snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
110
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
|
110
111
|
lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
|
111
112
|
def test_compress(data)
|
112
113
|
compress_type, compressor_class = data
|
@@ -148,6 +149,23 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
148
149
|
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
149
150
|
end
|
150
151
|
|
152
|
+
data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
153
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
|
154
|
+
def test_compression_block_size(data)
|
155
|
+
compress_type, compressor_class = data
|
156
|
+
conf = config_element(
|
157
|
+
"ROOT", "", {
|
158
|
+
"host" => "namenode.local",
|
159
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
160
|
+
"compress" => compress_type,
|
161
|
+
"block_size" => 16384
|
162
|
+
})
|
163
|
+
d = create_driver(conf)
|
164
|
+
|
165
|
+
assert_equal compress_type, d.instance.compress
|
166
|
+
assert_equal 16384, d.instance.compressor.block_size
|
167
|
+
end
|
168
|
+
|
151
169
|
def test_placeholders_old_style
|
152
170
|
conf = config_element(
|
153
171
|
"ROOT", "", {
|
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
rescue LoadError
|
6
6
|
end
|
7
7
|
|
8
|
-
class
|
8
|
+
class SnappyCompressorsTest < Test::Unit::TestCase
|
9
9
|
class Snappy < self
|
10
10
|
|
11
11
|
CONFIG = %[
|
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
16
16
|
def setup
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
18
18
|
Fluent::Test.setup
|
19
|
-
|
19
|
+
|
20
|
+
@compressors_size = 2
|
21
|
+
@compressors = [
|
22
|
+
Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
|
23
|
+
Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
|
24
|
+
]
|
25
|
+
@readers = [
|
26
|
+
::Snappy::Reader,
|
27
|
+
::Snappy::Hadoop::Reader
|
28
|
+
]
|
29
|
+
@exts = [".sz", ".snappy"]
|
20
30
|
end
|
21
31
|
|
22
32
|
def create_driver(conf = CONFIG)
|
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
|
|
24
34
|
end
|
25
35
|
|
26
36
|
def test_ext
|
27
|
-
|
37
|
+
for i in 0...@compressors_size do
|
38
|
+
assert_equal(@exts[i], @compressors[i].ext)
|
39
|
+
end
|
28
40
|
end
|
29
41
|
|
30
42
|
def test_compress
|
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
43
55
|
chunk << "hello snappy\n" * 32 * 1024
|
44
56
|
end
|
45
57
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
58
|
+
for i in 0...@compressors_size do
|
59
|
+
io = Tempfile.new("snappy-")
|
60
|
+
@compressors[i].compress(chunk, io)
|
61
|
+
io.open
|
62
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
63
|
+
assert(chunk_bytesize > io.read.bytesize)
|
64
|
+
io.rewind
|
65
|
+
reader = @readers[i].new(io)
|
66
|
+
assert_equal(chunk.read, reader.read)
|
67
|
+
io.close
|
68
|
+
end
|
55
69
|
end
|
56
70
|
end
|
57
71
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-12-
|
11
|
+
date: 2020-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -154,14 +154,15 @@ files:
|
|
154
154
|
- lib/fluent/plugin/out_webhdfs.rb
|
155
155
|
- lib/fluent/plugin/webhdfs_compressor_bzip2.rb
|
156
156
|
- lib/fluent/plugin/webhdfs_compressor_gzip.rb
|
157
|
+
- lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
|
157
158
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
158
159
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
159
160
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
161
|
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
161
162
|
- test/helper.rb
|
162
|
-
- test/plugin/test_compressor.rb
|
163
163
|
- test/plugin/test_gzip_compressor.rb
|
164
164
|
- test/plugin/test_out_webhdfs.rb
|
165
|
+
- test/plugin/test_snappy_compressors.rb
|
165
166
|
- test/plugin/test_zstd_compressor.rb
|
166
167
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
167
168
|
licenses:
|
@@ -188,7 +189,7 @@ specification_version: 4
|
|
188
189
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
189
190
|
test_files:
|
190
191
|
- test/helper.rb
|
191
|
-
- test/plugin/test_compressor.rb
|
192
192
|
- test/plugin/test_gzip_compressor.rb
|
193
193
|
- test/plugin/test_out_webhdfs.rb
|
194
|
+
- test/plugin/test_snappy_compressors.rb
|
194
195
|
- test/plugin/test_zstd_compressor.rb
|