fluent-plugin-webhdfs 1.2.2 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/README.md +21 -2
- data/fluent-plugin-webhdfs.gemspec +3 -2
- data/lib/fluent/plugin/out_webhdfs.rb +19 -7
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/lib/fluent/plugin/webhdfs_compressor_zstd.rb +24 -0
- data/test/plugin/test_out_webhdfs.rb +35 -0
- data/test/plugin/test_zstd_compressor.rb +55 -0
- metadata +22 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 64bdfcd21e62ec4f83f7807b7b7a60e51f0ed68f2d015f39424dd47b76473be3
|
4
|
+
data.tar.gz: cbf248448c4eaa9b35689076ac0010753af7eb138d24cd377c43bcf5dfd46595
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d76806177dc1ff5e33dbf4fcc0e1fd0b3a47bd1a4fa1e51287a033ba4ccb94c750a540f735e1144be0bd502629c05ee19d74462ab2fe6831979d378b35686d20
|
7
|
+
data.tar.gz: e7d195870652a72c6d3edea039336742e3564a01ac9871036ddc4dcb3d0eb58bb4b9c73ac6598ea2712ec1ec17870e9ee6d091a2a89204688fb3935aa5ccc34b
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# fluent-plugin-webhdfs
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
4
6
|
|
5
7
|
"webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
|
@@ -155,14 +157,31 @@ If you want to compress data before storing it:
|
|
155
157
|
host namenode.your.cluster.local
|
156
158
|
port 50070
|
157
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
158
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command'
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
159
161
|
</match>
|
160
162
|
|
161
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
|
162
164
|
Note that you have to install additional gem for several compress algorithms:
|
163
165
|
|
164
166
|
- snappy: install snappy gem
|
165
167
|
- bzip2: install bzip2-ffi gem
|
168
|
+
- zstd: install zstandard gem
|
169
|
+
|
170
|
+
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
171
|
+
|
172
|
+
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
173
|
+
|
174
|
+
<match access.**>
|
175
|
+
@type webhdfs
|
176
|
+
host namenode.your.cluster.local
|
177
|
+
port 50070
|
178
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
179
|
+
compress snappy
|
180
|
+
extension ".snappy"
|
181
|
+
</match>
|
182
|
+
|
183
|
+
With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
|
184
|
+
This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
|
166
185
|
|
167
186
|
### Namenode HA / Auto retry for WebHDFS known errors
|
168
187
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.3.1"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
|
-
gem.
|
24
|
+
gem.add_development_dependency "zstandard"
|
25
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
25
26
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
26
27
|
end
|
@@ -67,10 +67,13 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
69
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
73
|
|
74
|
+
desc 'HDFS file extensions (overrides default compressor extensions)'
|
75
|
+
config_param :extension, :string, default: nil
|
76
|
+
|
74
77
|
config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
|
75
78
|
config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
|
76
79
|
config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
|
@@ -98,20 +101,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
98
101
|
end
|
99
102
|
|
100
103
|
def configure(conf)
|
101
|
-
compat_parameters_convert
|
102
|
-
|
104
|
+
# #compat_parameters_convert ignore time format in conf["path"],
|
105
|
+
# so check conf["path"] and overwrite the default value later if needed
|
103
106
|
timekey = case conf["path"]
|
104
107
|
when /%S/ then 1
|
105
108
|
when /%M/ then 60
|
106
109
|
when /%H/ then 3600
|
107
110
|
else 86400
|
108
111
|
end
|
112
|
+
if buffer_config = conf.elements(name: "buffer").first
|
113
|
+
timekey = buffer_config["timekey"] || timekey
|
114
|
+
end
|
115
|
+
|
116
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
|
117
|
+
|
109
118
|
if conf.elements(name: "buffer").empty?
|
110
119
|
e = Fluent::Config::Element.new("buffer", "time", {}, [])
|
111
120
|
conf.elements << e
|
112
121
|
end
|
113
122
|
buffer_config = conf.elements(name: "buffer").first
|
114
|
-
|
123
|
+
# explicitly set timekey
|
124
|
+
buffer_config["timekey"] = timekey
|
115
125
|
|
116
126
|
compat_parameters_convert_plaintextformatter(conf)
|
117
127
|
verify_config_placeholders_in_path!(conf)
|
@@ -308,11 +318,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
308
318
|
|
309
319
|
def generate_path(chunk)
|
310
320
|
hdfs_path = if @append
|
311
|
-
extract_placeholders(@path, chunk
|
321
|
+
extract_placeholders(@path, chunk)
|
312
322
|
else
|
313
|
-
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk
|
323
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
314
324
|
end
|
315
|
-
|
325
|
+
hdfs_ext = @extension || @compressor.ext
|
326
|
+
hdfs_path = "#{hdfs_path}#{hdfs_ext}"
|
316
327
|
if @replace_random_uuid
|
317
328
|
uuid_random = SecureRandom.uuid
|
318
329
|
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
@@ -512,3 +523,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
512
523
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
513
524
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
514
525
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
526
|
+
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class ZstdCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('zstd', self)
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
begin
|
8
|
+
require "zstandard"
|
9
|
+
rescue LoadError
|
10
|
+
raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def ext
|
15
|
+
".zst"
|
16
|
+
end
|
17
|
+
|
18
|
+
def compress(chunk, tmp)
|
19
|
+
tmp.binmode
|
20
|
+
tmp.write Zstandard.deflate(chunk.read)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -126,6 +126,26 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
126
126
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
127
127
|
assert_equal compress_type, d.instance.compress
|
128
128
|
assert_equal compressor_class, d.instance.compressor.class
|
129
|
+
|
130
|
+
time = event_time("2020-10-03 15:07:00 +0300")
|
131
|
+
metadata = d.instance.metadata("test", time, {})
|
132
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
133
|
+
assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_explicit_extensions
|
137
|
+
conf = config_element(
|
138
|
+
"ROOT", "", {
|
139
|
+
"host" => "namenode.local",
|
140
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
141
|
+
"compress" => "snappy",
|
142
|
+
"extension" => ".snappy"
|
143
|
+
})
|
144
|
+
d = create_driver(conf)
|
145
|
+
time = event_time("2020-10-07 15:15:00 +0300")
|
146
|
+
metadata = d.instance.metadata("test", time, {})
|
147
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
148
|
+
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
129
149
|
end
|
130
150
|
|
131
151
|
def test_placeholders_old_style
|
@@ -207,6 +227,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
207
227
|
assert_equal 1, d.instance.buffer_config.timekey
|
208
228
|
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
209
229
|
end
|
230
|
+
|
231
|
+
def test_time_key_without_buffer_section
|
232
|
+
conf = config_element(
|
233
|
+
"ROOT", "", {
|
234
|
+
"host" => "namenode.local",
|
235
|
+
"path" => "/hdfs/path/file.%Y%m%d-%M.log"
|
236
|
+
}
|
237
|
+
)
|
238
|
+
d = create_driver(conf)
|
239
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
240
|
+
metadata = d.instance.metadata("test", time, {})
|
241
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
242
|
+
assert_equal 60, d.instance.buffer_config.timekey
|
243
|
+
assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
|
244
|
+
end
|
210
245
|
end
|
211
246
|
|
212
247
|
sub_test_case "using format subsection" do
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "helper"
|
2
|
+
require "fluent/plugin/buf_memory"
|
3
|
+
require 'zstandard'
|
4
|
+
|
5
|
+
class ZstdCompressorTest < Test::Unit::TestCase
|
6
|
+
class Zstd < self
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
host namenode.local
|
10
|
+
path /hdfs/path/file.%Y%m%d.log
|
11
|
+
]
|
12
|
+
|
13
|
+
def setup
|
14
|
+
omit unless Object.const_defined?(:Zstandard)
|
15
|
+
Fluent::Test.setup
|
16
|
+
@compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_driver(conf = CONFIG)
|
20
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_ext
|
24
|
+
assert_equal(".zst", @compressor.ext)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_compress
|
28
|
+
d = create_driver
|
29
|
+
if d.instance.respond_to?(:buffer)
|
30
|
+
buffer = d.instance.buffer
|
31
|
+
else
|
32
|
+
buffer = d.instance.instance_variable_get(:@buffer)
|
33
|
+
end
|
34
|
+
|
35
|
+
if buffer.respond_to?(:generate_chunk)
|
36
|
+
chunk = buffer.generate_chunk("test")
|
37
|
+
chunk.concat("hello gzip\n" * 32 * 1024, 1)
|
38
|
+
else
|
39
|
+
chunk = buffer.new_chunk("test")
|
40
|
+
chunk << "hello gzip\n" * 32 * 1024
|
41
|
+
end
|
42
|
+
|
43
|
+
io = Tempfile.new("zstd-")
|
44
|
+
@compressor.compress(chunk, io)
|
45
|
+
assert !io.closed?
|
46
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
47
|
+
assert(chunk_bytesize > io.read.bytesize)
|
48
|
+
io.rewind
|
49
|
+
reader = Zstandard.inflate(io.read)
|
50
|
+
io.rewind
|
51
|
+
assert_equal(chunk.read, reader)
|
52
|
+
io.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,34 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: zstandard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: fluentd
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
115
|
- - ">="
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.14.
|
117
|
+
version: 0.14.22
|
104
118
|
type: :runtime
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.14.
|
124
|
+
version: 0.14.22
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: webhdfs
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,10 +157,12 @@ files:
|
|
143
157
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
144
158
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
145
159
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
|
+
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
146
161
|
- test/helper.rb
|
147
162
|
- test/plugin/test_compressor.rb
|
148
163
|
- test/plugin/test_gzip_compressor.rb
|
149
164
|
- test/plugin/test_out_webhdfs.rb
|
165
|
+
- test/plugin/test_zstd_compressor.rb
|
150
166
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
151
167
|
licenses:
|
152
168
|
- Apache-2.0
|
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
182
|
- !ruby/object:Gem::Version
|
167
183
|
version: '0'
|
168
184
|
requirements: []
|
169
|
-
|
170
|
-
rubygems_version: 2.6.11
|
185
|
+
rubygems_version: 3.0.3
|
171
186
|
signing_key:
|
172
187
|
specification_version: 4
|
173
188
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
@@ -176,3 +191,4 @@ test_files:
|
|
176
191
|
- test/plugin/test_compressor.rb
|
177
192
|
- test/plugin/test_gzip_compressor.rb
|
178
193
|
- test/plugin/test_out_webhdfs.rb
|
194
|
+
- test/plugin/test_zstd_compressor.rb
|