fluent-plugin-webhdfs 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/README.md +8 -3
- data/fluent-plugin-webhdfs.gemspec +3 -2
- data/lib/fluent/plugin/out_webhdfs.rb +14 -6
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/lib/fluent/plugin/webhdfs_compressor_zstd.rb +24 -0
- data/test/plugin/test_out_webhdfs.rb +16 -0
- data/test/plugin/test_zstd_compressor.rb +55 -0
- metadata +22 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6318fcc2f54eb64be7d99978a04b1cad30e740734ed0dafdf1441cf65988f1ae
|
4
|
+
data.tar.gz: 1d49b9c8b009a9a6078c4f9e9fe08b79832510cc780e50344ea72e8493c8e5c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16f6c451fe30d09b33255dcac7ed8714beb0c157fee3bdce8e041b1de9c0aa032910b8cf21ac1effd8dddbea4240a44c5e68e3a83275e386fd881dbb22c75b1d
|
7
|
+
data.tar.gz: c59533a82d5c46e9ea178402ede6852aa4d8fa7375981433b7f5b16311a55bb4e58234a0e28b657e318b690946c28622bfbda5da6b4318549342a394d6b8c15a
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# fluent-plugin-webhdfs
|
2
2
|
|
3
|
+
[](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
4
6
|
|
5
7
|
"webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
|
@@ -155,14 +157,17 @@ If you want to compress data before storing it:
|
|
155
157
|
host namenode.your.cluster.local
|
156
158
|
port 50070
|
157
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
158
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command'
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
159
161
|
</match>
|
160
162
|
|
161
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
|
162
164
|
Note that you have to install additional gem for several compress algorithms:
|
163
165
|
|
164
166
|
- snappy: install snappy gem
|
165
167
|
- bzip2: install bzip2-ffi gem
|
168
|
+
- zstd: install zstandard gem
|
169
|
+
|
170
|
+
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
166
171
|
|
167
172
|
### Namenode HA / Auto retry for WebHDFS known errors
|
168
173
|
|
@@ -225,7 +230,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
225
230
|
|
226
231
|
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
|
227
232
|
|
228
|
-
If you were
|
233
|
+
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
229
234
|
|
230
235
|
<match access.**>
|
231
236
|
@type webhdfs
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.3.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
|
-
gem.
|
24
|
+
gem.add_development_dependency "zstandard"
|
25
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
25
26
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
26
27
|
end
|
@@ -67,7 +67,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
69
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
73
|
|
@@ -98,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
98
98
|
end
|
99
99
|
|
100
100
|
def configure(conf)
|
101
|
-
compat_parameters_convert
|
102
|
-
|
101
|
+
# #compat_parameters_convert ignore time format in conf["path"],
|
102
|
+
# so check conf["path"] and overwrite the default value later if needed
|
103
103
|
timekey = case conf["path"]
|
104
104
|
when /%S/ then 1
|
105
105
|
when /%M/ then 60
|
106
106
|
when /%H/ then 3600
|
107
107
|
else 86400
|
108
108
|
end
|
109
|
+
if buffer_config = conf.elements(name: "buffer").first
|
110
|
+
timekey = buffer_config["timekey"] || timekey
|
111
|
+
end
|
112
|
+
|
113
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
|
114
|
+
|
109
115
|
if conf.elements(name: "buffer").empty?
|
110
116
|
e = Fluent::Config::Element.new("buffer", "time", {}, [])
|
111
117
|
conf.elements << e
|
112
118
|
end
|
113
119
|
buffer_config = conf.elements(name: "buffer").first
|
114
|
-
|
120
|
+
# explicitly set timekey
|
121
|
+
buffer_config["timekey"] = timekey
|
115
122
|
|
116
123
|
compat_parameters_convert_plaintextformatter(conf)
|
117
124
|
verify_config_placeholders_in_path!(conf)
|
@@ -308,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
308
315
|
|
309
316
|
def generate_path(chunk)
|
310
317
|
hdfs_path = if @append
|
311
|
-
extract_placeholders(@path, chunk
|
318
|
+
extract_placeholders(@path, chunk)
|
312
319
|
else
|
313
|
-
extract_placeholders(@path
|
320
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
314
321
|
end
|
315
322
|
hdfs_path = "#{hdfs_path}#{@compressor.ext}"
|
316
323
|
if @replace_random_uuid
|
@@ -512,3 +519,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
512
519
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
513
520
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
514
521
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
522
|
+
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class ZstdCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('zstd', self)
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
begin
|
8
|
+
require "zstandard"
|
9
|
+
rescue LoadError
|
10
|
+
raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def ext
|
15
|
+
".zst"
|
16
|
+
end
|
17
|
+
|
18
|
+
def compress(chunk, tmp)
|
19
|
+
tmp.binmode
|
20
|
+
tmp.write Zstandard.deflate(chunk.read)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -168,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
168
168
|
metadata = d.instance.metadata("test", nil, {})
|
169
169
|
chunk = d.instance.buffer.generate_chunk(metadata)
|
170
170
|
assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
|
171
|
+
assert_empty d.instance.log.out.logs
|
171
172
|
end
|
172
173
|
|
173
174
|
data(path: { "append" => false },
|
@@ -206,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
206
207
|
assert_equal 1, d.instance.buffer_config.timekey
|
207
208
|
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
208
209
|
end
|
210
|
+
|
211
|
+
def test_time_key_without_buffer_section
|
212
|
+
conf = config_element(
|
213
|
+
"ROOT", "", {
|
214
|
+
"host" => "namenode.local",
|
215
|
+
"path" => "/hdfs/path/file.%Y%m%d-%M.log"
|
216
|
+
}
|
217
|
+
)
|
218
|
+
d = create_driver(conf)
|
219
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
220
|
+
metadata = d.instance.metadata("test", time, {})
|
221
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
222
|
+
assert_equal 60, d.instance.buffer_config.timekey
|
223
|
+
assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
|
224
|
+
end
|
209
225
|
end
|
210
226
|
|
211
227
|
sub_test_case "using format subsection" do
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "helper"
|
2
|
+
require "fluent/plugin/buf_memory"
|
3
|
+
require 'zstandard'
|
4
|
+
|
5
|
+
class ZstdCompressorTest < Test::Unit::TestCase
|
6
|
+
class Zstd < self
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
host namenode.local
|
10
|
+
path /hdfs/path/file.%Y%m%d.log
|
11
|
+
]
|
12
|
+
|
13
|
+
def setup
|
14
|
+
omit unless Object.const_defined?(:Zstandard)
|
15
|
+
Fluent::Test.setup
|
16
|
+
@compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_driver(conf = CONFIG)
|
20
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_ext
|
24
|
+
assert_equal(".zst", @compressor.ext)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_compress
|
28
|
+
d = create_driver
|
29
|
+
if d.instance.respond_to?(:buffer)
|
30
|
+
buffer = d.instance.buffer
|
31
|
+
else
|
32
|
+
buffer = d.instance.instance_variable_get(:@buffer)
|
33
|
+
end
|
34
|
+
|
35
|
+
if buffer.respond_to?(:generate_chunk)
|
36
|
+
chunk = buffer.generate_chunk("test")
|
37
|
+
chunk.concat("hello gzip\n" * 32 * 1024, 1)
|
38
|
+
else
|
39
|
+
chunk = buffer.new_chunk("test")
|
40
|
+
chunk << "hello gzip\n" * 32 * 1024
|
41
|
+
end
|
42
|
+
|
43
|
+
io = Tempfile.new("zstd-")
|
44
|
+
@compressor.compress(chunk, io)
|
45
|
+
assert !io.closed?
|
46
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
47
|
+
assert(chunk_bytesize > io.read.bytesize)
|
48
|
+
io.rewind
|
49
|
+
reader = Zstandard.inflate(io.read)
|
50
|
+
io.rewind
|
51
|
+
assert_equal(chunk.read, reader)
|
52
|
+
io.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,34 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: zstandard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: fluentd
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
115
|
- - ">="
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.14.
|
117
|
+
version: 0.14.22
|
104
118
|
type: :runtime
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.14.
|
124
|
+
version: 0.14.22
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: webhdfs
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,10 +157,12 @@ files:
|
|
143
157
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
144
158
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
145
159
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
|
+
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
146
161
|
- test/helper.rb
|
147
162
|
- test/plugin/test_compressor.rb
|
148
163
|
- test/plugin/test_gzip_compressor.rb
|
149
164
|
- test/plugin/test_out_webhdfs.rb
|
165
|
+
- test/plugin/test_zstd_compressor.rb
|
150
166
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
151
167
|
licenses:
|
152
168
|
- Apache-2.0
|
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
182
|
- !ruby/object:Gem::Version
|
167
183
|
version: '0'
|
168
184
|
requirements: []
|
169
|
-
|
170
|
-
rubygems_version: 2.6.11
|
185
|
+
rubygems_version: 3.0.3
|
171
186
|
signing_key:
|
172
187
|
specification_version: 4
|
173
188
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
@@ -176,3 +191,4 @@ test_files:
|
|
176
191
|
- test/plugin/test_compressor.rb
|
177
192
|
- test/plugin/test_gzip_compressor.rb
|
178
193
|
- test/plugin/test_out_webhdfs.rb
|
194
|
+
- test/plugin/test_zstd_compressor.rb
|