fluent-plugin-webhdfs 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/README.md +8 -3
- data/fluent-plugin-webhdfs.gemspec +3 -2
- data/lib/fluent/plugin/out_webhdfs.rb +14 -6
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/lib/fluent/plugin/webhdfs_compressor_zstd.rb +24 -0
- data/test/plugin/test_out_webhdfs.rb +16 -0
- data/test/plugin/test_zstd_compressor.rb +55 -0
- metadata +22 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6318fcc2f54eb64be7d99978a04b1cad30e740734ed0dafdf1441cf65988f1ae
|
4
|
+
data.tar.gz: 1d49b9c8b009a9a6078c4f9e9fe08b79832510cc780e50344ea72e8493c8e5c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16f6c451fe30d09b33255dcac7ed8714beb0c157fee3bdce8e041b1de9c0aa032910b8cf21ac1effd8dddbea4240a44c5e68e3a83275e386fd881dbb22c75b1d
|
7
|
+
data.tar.gz: c59533a82d5c46e9ea178402ede6852aa4d8fa7375981433b7f5b16311a55bb4e58234a0e28b657e318b690946c28622bfbda5da6b4318549342a394d6b8c15a
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# fluent-plugin-webhdfs
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
4
6
|
|
5
7
|
"webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
|
@@ -155,14 +157,17 @@ If you want to compress data before storing it:
|
|
155
157
|
host namenode.your.cluster.local
|
156
158
|
port 50070
|
157
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
158
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command'
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
159
161
|
</match>
|
160
162
|
|
161
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
|
162
164
|
Note that you have to install additional gem for several compress algorithms:
|
163
165
|
|
164
166
|
- snappy: install snappy gem
|
165
167
|
- bzip2: install bzip2-ffi gem
|
168
|
+
- zstd: install zstandard gem
|
169
|
+
|
170
|
+
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
166
171
|
|
167
172
|
### Namenode HA / Auto retry for WebHDFS known errors
|
168
173
|
|
@@ -225,7 +230,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
225
230
|
|
226
231
|
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
|
227
232
|
|
228
|
-
If you were
|
233
|
+
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
229
234
|
|
230
235
|
<match access.**>
|
231
236
|
@type webhdfs
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.3.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
|
-
gem.
|
24
|
+
gem.add_development_dependency "zstandard"
|
25
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
25
26
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
26
27
|
end
|
@@ -67,7 +67,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
69
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
73
|
|
@@ -98,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
98
98
|
end
|
99
99
|
|
100
100
|
def configure(conf)
|
101
|
-
compat_parameters_convert
|
102
|
-
|
101
|
+
# #compat_parameters_convert ignore time format in conf["path"],
|
102
|
+
# so check conf["path"] and overwrite the default value later if needed
|
103
103
|
timekey = case conf["path"]
|
104
104
|
when /%S/ then 1
|
105
105
|
when /%M/ then 60
|
106
106
|
when /%H/ then 3600
|
107
107
|
else 86400
|
108
108
|
end
|
109
|
+
if buffer_config = conf.elements(name: "buffer").first
|
110
|
+
timekey = buffer_config["timekey"] || timekey
|
111
|
+
end
|
112
|
+
|
113
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
|
114
|
+
|
109
115
|
if conf.elements(name: "buffer").empty?
|
110
116
|
e = Fluent::Config::Element.new("buffer", "time", {}, [])
|
111
117
|
conf.elements << e
|
112
118
|
end
|
113
119
|
buffer_config = conf.elements(name: "buffer").first
|
114
|
-
|
120
|
+
# explicitly set timekey
|
121
|
+
buffer_config["timekey"] = timekey
|
115
122
|
|
116
123
|
compat_parameters_convert_plaintextformatter(conf)
|
117
124
|
verify_config_placeholders_in_path!(conf)
|
@@ -308,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
308
315
|
|
309
316
|
def generate_path(chunk)
|
310
317
|
hdfs_path = if @append
|
311
|
-
extract_placeholders(@path, chunk
|
318
|
+
extract_placeholders(@path, chunk)
|
312
319
|
else
|
313
|
-
extract_placeholders(@path
|
320
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
314
321
|
end
|
315
322
|
hdfs_path = "#{hdfs_path}#{@compressor.ext}"
|
316
323
|
if @replace_random_uuid
|
@@ -512,3 +519,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
512
519
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
513
520
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
514
521
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
522
|
+
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class ZstdCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('zstd', self)
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
begin
|
8
|
+
require "zstandard"
|
9
|
+
rescue LoadError
|
10
|
+
raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def ext
|
15
|
+
".zst"
|
16
|
+
end
|
17
|
+
|
18
|
+
def compress(chunk, tmp)
|
19
|
+
tmp.binmode
|
20
|
+
tmp.write Zstandard.deflate(chunk.read)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -168,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
168
168
|
metadata = d.instance.metadata("test", nil, {})
|
169
169
|
chunk = d.instance.buffer.generate_chunk(metadata)
|
170
170
|
assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
|
171
|
+
assert_empty d.instance.log.out.logs
|
171
172
|
end
|
172
173
|
|
173
174
|
data(path: { "append" => false },
|
@@ -206,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
206
207
|
assert_equal 1, d.instance.buffer_config.timekey
|
207
208
|
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
208
209
|
end
|
210
|
+
|
211
|
+
def test_time_key_without_buffer_section
|
212
|
+
conf = config_element(
|
213
|
+
"ROOT", "", {
|
214
|
+
"host" => "namenode.local",
|
215
|
+
"path" => "/hdfs/path/file.%Y%m%d-%M.log"
|
216
|
+
}
|
217
|
+
)
|
218
|
+
d = create_driver(conf)
|
219
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
220
|
+
metadata = d.instance.metadata("test", time, {})
|
221
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
222
|
+
assert_equal 60, d.instance.buffer_config.timekey
|
223
|
+
assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
|
224
|
+
end
|
209
225
|
end
|
210
226
|
|
211
227
|
sub_test_case "using format subsection" do
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "helper"
|
2
|
+
require "fluent/plugin/buf_memory"
|
3
|
+
require 'zstandard'
|
4
|
+
|
5
|
+
class ZstdCompressorTest < Test::Unit::TestCase
|
6
|
+
class Zstd < self
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
host namenode.local
|
10
|
+
path /hdfs/path/file.%Y%m%d.log
|
11
|
+
]
|
12
|
+
|
13
|
+
def setup
|
14
|
+
omit unless Object.const_defined?(:Zstandard)
|
15
|
+
Fluent::Test.setup
|
16
|
+
@compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_driver(conf = CONFIG)
|
20
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_ext
|
24
|
+
assert_equal(".zst", @compressor.ext)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_compress
|
28
|
+
d = create_driver
|
29
|
+
if d.instance.respond_to?(:buffer)
|
30
|
+
buffer = d.instance.buffer
|
31
|
+
else
|
32
|
+
buffer = d.instance.instance_variable_get(:@buffer)
|
33
|
+
end
|
34
|
+
|
35
|
+
if buffer.respond_to?(:generate_chunk)
|
36
|
+
chunk = buffer.generate_chunk("test")
|
37
|
+
chunk.concat("hello gzip\n" * 32 * 1024, 1)
|
38
|
+
else
|
39
|
+
chunk = buffer.new_chunk("test")
|
40
|
+
chunk << "hello gzip\n" * 32 * 1024
|
41
|
+
end
|
42
|
+
|
43
|
+
io = Tempfile.new("zstd-")
|
44
|
+
@compressor.compress(chunk, io)
|
45
|
+
assert !io.closed?
|
46
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
47
|
+
assert(chunk_bytesize > io.read.bytesize)
|
48
|
+
io.rewind
|
49
|
+
reader = Zstandard.inflate(io.read)
|
50
|
+
io.rewind
|
51
|
+
assert_equal(chunk.read, reader)
|
52
|
+
io.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,34 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: zstandard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: fluentd
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
115
|
- - ">="
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.14.
|
117
|
+
version: 0.14.22
|
104
118
|
type: :runtime
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.14.
|
124
|
+
version: 0.14.22
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: webhdfs
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,10 +157,12 @@ files:
|
|
143
157
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
144
158
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
145
159
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
|
+
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
146
161
|
- test/helper.rb
|
147
162
|
- test/plugin/test_compressor.rb
|
148
163
|
- test/plugin/test_gzip_compressor.rb
|
149
164
|
- test/plugin/test_out_webhdfs.rb
|
165
|
+
- test/plugin/test_zstd_compressor.rb
|
150
166
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
151
167
|
licenses:
|
152
168
|
- Apache-2.0
|
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
182
|
- !ruby/object:Gem::Version
|
167
183
|
version: '0'
|
168
184
|
requirements: []
|
169
|
-
|
170
|
-
rubygems_version: 2.6.11
|
185
|
+
rubygems_version: 3.0.3
|
171
186
|
signing_key:
|
172
187
|
specification_version: 4
|
173
188
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
@@ -176,3 +191,4 @@ test_files:
|
|
176
191
|
- test/plugin/test_compressor.rb
|
177
192
|
- test/plugin/test_gzip_compressor.rb
|
178
193
|
- test/plugin/test_out_webhdfs.rb
|
194
|
+
- test/plugin/test_zstd_compressor.rb
|