fluent-plugin-webhdfs 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -2
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +5 -3
- data/lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb +32 -0
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +8 -2
- data/test/plugin/test_out_webhdfs.rb +18 -0
- data/test/plugin/{test_compressor.rb → test_snappy_compressors.rb} +26 -12
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2d96f9304470f4d3409a1209b96c56722a62acfefc55633d81525e21103ec7e9
|
|
4
|
+
data.tar.gz: 8c6982670e30e112815a3abec28a2865609e4ca2cccbe097a42b6abb9080af21
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a56a3b8ac2e7bf279ddb23d5a4fafb187289883442b01355f86cf3d626332aff35a78f4fee38c32767a7162741b550143fbfc50f9d953db0ce8a8220d022d35f
|
|
7
|
+
data.tar.gz: 7110d25391fc90d0e0aa8042014596994b3fe700737a08febd1f1d69485fd9eef212f0882e51832e2a3f95d64a7e21e0091f8f8e37444151404104230645029d
|
data/README.md
CHANGED
|
@@ -157,18 +157,30 @@ If you want to compress data before storing it:
|
|
|
157
157
|
host namenode.your.cluster.local
|
|
158
158
|
port 50070
|
|
159
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
|
160
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
|
|
161
161
|
</match>
|
|
162
162
|
|
|
163
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`,
|
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
|
|
164
164
|
Note that you have to install additional gem for several compress algorithms:
|
|
165
165
|
|
|
166
166
|
- snappy: install snappy gem
|
|
167
|
+
- hadoop_snappy: install snappy gem
|
|
167
168
|
- bzip2: install bzip2-ffi gem
|
|
168
169
|
- zstd: install zstandard gem
|
|
169
170
|
|
|
170
171
|
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
|
171
172
|
|
|
173
|
+
You can also specify compression block size (currently supported only for Snappy codecs):
|
|
174
|
+
|
|
175
|
+
<match access.**>
|
|
176
|
+
@type webhdfs
|
|
177
|
+
host namenode.your.cluster.local
|
|
178
|
+
port 50070
|
|
179
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
|
180
|
+
compress hadoop_snappy
|
|
181
|
+
block_size 32768
|
|
182
|
+
</match>
|
|
183
|
+
|
|
172
184
|
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
|
173
185
|
|
|
174
186
|
<match access.**>
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |gem|
|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
|
5
|
-
gem.version = "1.
|
|
5
|
+
gem.version = "1.4.0"
|
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
|
@@ -67,8 +67,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
|
67
67
|
desc 'kerberos keytab file'
|
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
|
69
69
|
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
|
71
|
-
desc "
|
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
|
|
71
|
+
desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
|
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
|
73
73
|
|
|
74
74
|
desc 'HDFS file extensions (overrides default compressor extensions)'
|
|
@@ -156,6 +156,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
|
156
156
|
end
|
|
157
157
|
|
|
158
158
|
@compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
|
|
159
|
+
@compressor.configure(conf)
|
|
159
160
|
|
|
160
161
|
if @host
|
|
161
162
|
@namenode_host = @host
|
|
@@ -511,7 +512,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
|
511
512
|
begin
|
|
512
513
|
Open3.capture3("#{command} -V")
|
|
513
514
|
rescue Errno::ENOENT
|
|
514
|
-
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
|
515
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
|
515
516
|
end
|
|
516
517
|
end
|
|
517
518
|
end
|
|
@@ -527,5 +528,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
|
|
|
527
528
|
require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
528
529
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
|
529
530
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
|
531
|
+
require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
|
|
530
532
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
|
531
533
|
require 'fluent/plugin/webhdfs_compressor_zstd'
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Fluent::Plugin
|
|
2
|
+
class WebHDFSOutput < Output
|
|
3
|
+
class HadoopSnappyCompressor < Compressor
|
|
4
|
+
WebHDFSOutput.register_compressor('hadoop_snappy', self)
|
|
5
|
+
|
|
6
|
+
DEFAULT_BLOCK_SIZE = 256 * 1024
|
|
7
|
+
|
|
8
|
+
desc 'Block size for compression algorithm'
|
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
|
10
|
+
|
|
11
|
+
def initialize(options = {})
|
|
12
|
+
super()
|
|
13
|
+
begin
|
|
14
|
+
require "snappy"
|
|
15
|
+
rescue LoadError
|
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def ext
|
|
21
|
+
".snappy"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def compress(chunk, tmp)
|
|
25
|
+
Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
|
|
26
|
+
w << chunk.read
|
|
27
|
+
w.flush
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -3,11 +3,17 @@ module Fluent::Plugin
|
|
|
3
3
|
class SnappyCompressor < Compressor
|
|
4
4
|
WebHDFSOutput.register_compressor('snappy', self)
|
|
5
5
|
|
|
6
|
+
DEFAULT_BLOCK_SIZE = 32 * 1024
|
|
7
|
+
|
|
8
|
+
desc 'Block size for compression algorithm'
|
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
|
10
|
+
|
|
6
11
|
def initialize(options = {})
|
|
12
|
+
super()
|
|
7
13
|
begin
|
|
8
14
|
require "snappy"
|
|
9
15
|
rescue LoadError
|
|
10
|
-
raise Fluent::ConfigError, "Install snappy before
|
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
|
11
17
|
end
|
|
12
18
|
end
|
|
13
19
|
|
|
@@ -16,7 +22,7 @@ module Fluent::Plugin
|
|
|
16
22
|
end
|
|
17
23
|
|
|
18
24
|
def compress(chunk, tmp)
|
|
19
|
-
Snappy::Writer.new(tmp) do |w|
|
|
25
|
+
Snappy::Writer.new(tmp, @block_size) do |w|
|
|
20
26
|
w << chunk.read
|
|
21
27
|
w.flush
|
|
22
28
|
end
|
|
@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
|
107
107
|
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
|
108
108
|
bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
|
|
109
109
|
snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
|
110
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
|
|
110
111
|
lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
|
|
111
112
|
def test_compress(data)
|
|
112
113
|
compress_type, compressor_class = data
|
|
@@ -148,6 +149,23 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
|
148
149
|
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
|
149
150
|
end
|
|
150
151
|
|
|
152
|
+
data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
|
153
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
|
|
154
|
+
def test_compression_block_size(data)
|
|
155
|
+
compress_type, compressor_class = data
|
|
156
|
+
conf = config_element(
|
|
157
|
+
"ROOT", "", {
|
|
158
|
+
"host" => "namenode.local",
|
|
159
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
|
160
|
+
"compress" => compress_type,
|
|
161
|
+
"block_size" => 16384
|
|
162
|
+
})
|
|
163
|
+
d = create_driver(conf)
|
|
164
|
+
|
|
165
|
+
assert_equal compress_type, d.instance.compress
|
|
166
|
+
assert_equal 16384, d.instance.compressor.block_size
|
|
167
|
+
end
|
|
168
|
+
|
|
151
169
|
def test_placeholders_old_style
|
|
152
170
|
conf = config_element(
|
|
153
171
|
"ROOT", "", {
|
|
@@ -5,7 +5,7 @@ begin
|
|
|
5
5
|
rescue LoadError
|
|
6
6
|
end
|
|
7
7
|
|
|
8
|
-
class
|
|
8
|
+
class SnappyCompressorsTest < Test::Unit::TestCase
|
|
9
9
|
class Snappy < self
|
|
10
10
|
|
|
11
11
|
CONFIG = %[
|
|
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
|
16
16
|
def setup
|
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
|
18
18
|
Fluent::Test.setup
|
|
19
|
-
|
|
19
|
+
|
|
20
|
+
@compressors_size = 2
|
|
21
|
+
@compressors = [
|
|
22
|
+
Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
|
|
23
|
+
Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
|
|
24
|
+
]
|
|
25
|
+
@readers = [
|
|
26
|
+
::Snappy::Reader,
|
|
27
|
+
::Snappy::Hadoop::Reader
|
|
28
|
+
]
|
|
29
|
+
@exts = [".sz", ".snappy"]
|
|
20
30
|
end
|
|
21
31
|
|
|
22
32
|
def create_driver(conf = CONFIG)
|
|
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
|
|
|
24
34
|
end
|
|
25
35
|
|
|
26
36
|
def test_ext
|
|
27
|
-
|
|
37
|
+
for i in 0...@compressors_size do
|
|
38
|
+
assert_equal(@exts[i], @compressors[i].ext)
|
|
39
|
+
end
|
|
28
40
|
end
|
|
29
41
|
|
|
30
42
|
def test_compress
|
|
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
|
43
55
|
chunk << "hello snappy\n" * 32 * 1024
|
|
44
56
|
end
|
|
45
57
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
58
|
+
for i in 0...@compressors_size do
|
|
59
|
+
io = Tempfile.new("snappy-")
|
|
60
|
+
@compressors[i].compress(chunk, io)
|
|
61
|
+
io.open
|
|
62
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
|
63
|
+
assert(chunk_bytesize > io.read.bytesize)
|
|
64
|
+
io.rewind
|
|
65
|
+
reader = @readers[i].new(io)
|
|
66
|
+
assert_equal(chunk.read, reader.read)
|
|
67
|
+
io.close
|
|
68
|
+
end
|
|
55
69
|
end
|
|
56
70
|
end
|
|
57
71
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-webhdfs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- TAGOMORI Satoshi
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-12-
|
|
11
|
+
date: 2020-12-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -154,14 +154,15 @@ files:
|
|
|
154
154
|
- lib/fluent/plugin/out_webhdfs.rb
|
|
155
155
|
- lib/fluent/plugin/webhdfs_compressor_bzip2.rb
|
|
156
156
|
- lib/fluent/plugin/webhdfs_compressor_gzip.rb
|
|
157
|
+
- lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
|
|
157
158
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
|
158
159
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
|
159
160
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
|
160
161
|
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
|
161
162
|
- test/helper.rb
|
|
162
|
-
- test/plugin/test_compressor.rb
|
|
163
163
|
- test/plugin/test_gzip_compressor.rb
|
|
164
164
|
- test/plugin/test_out_webhdfs.rb
|
|
165
|
+
- test/plugin/test_snappy_compressors.rb
|
|
165
166
|
- test/plugin/test_zstd_compressor.rb
|
|
166
167
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
|
167
168
|
licenses:
|
|
@@ -188,7 +189,7 @@ specification_version: 4
|
|
|
188
189
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
|
189
190
|
test_files:
|
|
190
191
|
- test/helper.rb
|
|
191
|
-
- test/plugin/test_compressor.rb
|
|
192
192
|
- test/plugin/test_gzip_compressor.rb
|
|
193
193
|
- test/plugin/test_out_webhdfs.rb
|
|
194
|
+
- test/plugin/test_snappy_compressors.rb
|
|
194
195
|
- test/plugin/test_zstd_compressor.rb
|