fluent-plugin-webhdfs 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/README.md +21 -2
- data/fluent-plugin-webhdfs.gemspec +3 -2
- data/lib/fluent/plugin/out_webhdfs.rb +19 -7
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/lib/fluent/plugin/webhdfs_compressor_zstd.rb +24 -0
- data/test/plugin/test_out_webhdfs.rb +35 -0
- data/test/plugin/test_zstd_compressor.rb +55 -0
- metadata +22 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 64bdfcd21e62ec4f83f7807b7b7a60e51f0ed68f2d015f39424dd47b76473be3
|
4
|
+
data.tar.gz: cbf248448c4eaa9b35689076ac0010753af7eb138d24cd377c43bcf5dfd46595
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d76806177dc1ff5e33dbf4fcc0e1fd0b3a47bd1a4fa1e51287a033ba4ccb94c750a540f735e1144be0bd502629c05ee19d74462ab2fe6831979d378b35686d20
|
7
|
+
data.tar.gz: e7d195870652a72c6d3edea039336742e3564a01ac9871036ddc4dcb3d0eb58bb4b9c73ac6598ea2712ec1ec17870e9ee6d091a2a89204688fb3935aa5ccc34b
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# fluent-plugin-webhdfs
|
2
2
|
|
3
|
+
[](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
4
6
|
|
5
7
|
"webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
|
@@ -155,14 +157,31 @@ If you want to compress data before storing it:
|
|
155
157
|
host namenode.your.cluster.local
|
156
158
|
port 50070
|
157
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
158
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command'
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
159
161
|
</match>
|
160
162
|
|
161
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
|
162
164
|
Note that you have to install additional gem for several compress algorithms:
|
163
165
|
|
164
166
|
- snappy: install snappy gem
|
165
167
|
- bzip2: install bzip2-ffi gem
|
168
|
+
- zstd: install zstandard gem
|
169
|
+
|
170
|
+
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
171
|
+
|
172
|
+
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
173
|
+
|
174
|
+
<match access.**>
|
175
|
+
@type webhdfs
|
176
|
+
host namenode.your.cluster.local
|
177
|
+
port 50070
|
178
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
179
|
+
compress snappy
|
180
|
+
extension ".snappy"
|
181
|
+
</match>
|
182
|
+
|
183
|
+
With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
|
184
|
+
This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
|
166
185
|
|
167
186
|
### Namenode HA / Auto retry for WebHDFS known errors
|
168
187
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.3.1"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
|
-
gem.
|
24
|
+
gem.add_development_dependency "zstandard"
|
25
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
25
26
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
26
27
|
end
|
@@ -67,10 +67,13 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
69
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
73
|
|
74
|
+
desc 'HDFS file extensions (overrides default compressor extensions)'
|
75
|
+
config_param :extension, :string, default: nil
|
76
|
+
|
74
77
|
config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
|
75
78
|
config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
|
76
79
|
config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
|
@@ -98,20 +101,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
98
101
|
end
|
99
102
|
|
100
103
|
def configure(conf)
|
101
|
-
compat_parameters_convert
|
102
|
-
|
104
|
+
# #compat_parameters_convert ignore time format in conf["path"],
|
105
|
+
# so check conf["path"] and overwrite the default value later if needed
|
103
106
|
timekey = case conf["path"]
|
104
107
|
when /%S/ then 1
|
105
108
|
when /%M/ then 60
|
106
109
|
when /%H/ then 3600
|
107
110
|
else 86400
|
108
111
|
end
|
112
|
+
if buffer_config = conf.elements(name: "buffer").first
|
113
|
+
timekey = buffer_config["timekey"] || timekey
|
114
|
+
end
|
115
|
+
|
116
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
|
117
|
+
|
109
118
|
if conf.elements(name: "buffer").empty?
|
110
119
|
e = Fluent::Config::Element.new("buffer", "time", {}, [])
|
111
120
|
conf.elements << e
|
112
121
|
end
|
113
122
|
buffer_config = conf.elements(name: "buffer").first
|
114
|
-
|
123
|
+
# explicitly set timekey
|
124
|
+
buffer_config["timekey"] = timekey
|
115
125
|
|
116
126
|
compat_parameters_convert_plaintextformatter(conf)
|
117
127
|
verify_config_placeholders_in_path!(conf)
|
@@ -308,11 +318,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
308
318
|
|
309
319
|
def generate_path(chunk)
|
310
320
|
hdfs_path = if @append
|
311
|
-
extract_placeholders(@path, chunk
|
321
|
+
extract_placeholders(@path, chunk)
|
312
322
|
else
|
313
|
-
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk
|
323
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
314
324
|
end
|
315
|
-
|
325
|
+
hdfs_ext = @extension || @compressor.ext
|
326
|
+
hdfs_path = "#{hdfs_path}#{hdfs_ext}"
|
316
327
|
if @replace_random_uuid
|
317
328
|
uuid_random = SecureRandom.uuid
|
318
329
|
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
@@ -512,3 +523,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
512
523
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
513
524
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
514
525
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
526
|
+
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class ZstdCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('zstd', self)
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
begin
|
8
|
+
require "zstandard"
|
9
|
+
rescue LoadError
|
10
|
+
raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def ext
|
15
|
+
".zst"
|
16
|
+
end
|
17
|
+
|
18
|
+
def compress(chunk, tmp)
|
19
|
+
tmp.binmode
|
20
|
+
tmp.write Zstandard.deflate(chunk.read)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -126,6 +126,26 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
126
126
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
127
127
|
assert_equal compress_type, d.instance.compress
|
128
128
|
assert_equal compressor_class, d.instance.compressor.class
|
129
|
+
|
130
|
+
time = event_time("2020-10-03 15:07:00 +0300")
|
131
|
+
metadata = d.instance.metadata("test", time, {})
|
132
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
133
|
+
assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_explicit_extensions
|
137
|
+
conf = config_element(
|
138
|
+
"ROOT", "", {
|
139
|
+
"host" => "namenode.local",
|
140
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
141
|
+
"compress" => "snappy",
|
142
|
+
"extension" => ".snappy"
|
143
|
+
})
|
144
|
+
d = create_driver(conf)
|
145
|
+
time = event_time("2020-10-07 15:15:00 +0300")
|
146
|
+
metadata = d.instance.metadata("test", time, {})
|
147
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
148
|
+
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
129
149
|
end
|
130
150
|
|
131
151
|
def test_placeholders_old_style
|
@@ -207,6 +227,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
207
227
|
assert_equal 1, d.instance.buffer_config.timekey
|
208
228
|
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
209
229
|
end
|
230
|
+
|
231
|
+
def test_time_key_without_buffer_section
|
232
|
+
conf = config_element(
|
233
|
+
"ROOT", "", {
|
234
|
+
"host" => "namenode.local",
|
235
|
+
"path" => "/hdfs/path/file.%Y%m%d-%M.log"
|
236
|
+
}
|
237
|
+
)
|
238
|
+
d = create_driver(conf)
|
239
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
240
|
+
metadata = d.instance.metadata("test", time, {})
|
241
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
242
|
+
assert_equal 60, d.instance.buffer_config.timekey
|
243
|
+
assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
|
244
|
+
end
|
210
245
|
end
|
211
246
|
|
212
247
|
sub_test_case "using format subsection" do
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "helper"
|
2
|
+
require "fluent/plugin/buf_memory"
|
3
|
+
require 'zstandard'
|
4
|
+
|
5
|
+
class ZstdCompressorTest < Test::Unit::TestCase
|
6
|
+
class Zstd < self
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
host namenode.local
|
10
|
+
path /hdfs/path/file.%Y%m%d.log
|
11
|
+
]
|
12
|
+
|
13
|
+
def setup
|
14
|
+
omit unless Object.const_defined?(:Zstandard)
|
15
|
+
Fluent::Test.setup
|
16
|
+
@compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_driver(conf = CONFIG)
|
20
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_ext
|
24
|
+
assert_equal(".zst", @compressor.ext)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_compress
|
28
|
+
d = create_driver
|
29
|
+
if d.instance.respond_to?(:buffer)
|
30
|
+
buffer = d.instance.buffer
|
31
|
+
else
|
32
|
+
buffer = d.instance.instance_variable_get(:@buffer)
|
33
|
+
end
|
34
|
+
|
35
|
+
if buffer.respond_to?(:generate_chunk)
|
36
|
+
chunk = buffer.generate_chunk("test")
|
37
|
+
chunk.concat("hello gzip\n" * 32 * 1024, 1)
|
38
|
+
else
|
39
|
+
chunk = buffer.new_chunk("test")
|
40
|
+
chunk << "hello gzip\n" * 32 * 1024
|
41
|
+
end
|
42
|
+
|
43
|
+
io = Tempfile.new("zstd-")
|
44
|
+
@compressor.compress(chunk, io)
|
45
|
+
assert !io.closed?
|
46
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
47
|
+
assert(chunk_bytesize > io.read.bytesize)
|
48
|
+
io.rewind
|
49
|
+
reader = Zstandard.inflate(io.read)
|
50
|
+
io.rewind
|
51
|
+
assert_equal(chunk.read, reader)
|
52
|
+
io.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,34 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: zstandard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: fluentd
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
115
|
- - ">="
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.14.
|
117
|
+
version: 0.14.22
|
104
118
|
type: :runtime
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.14.
|
124
|
+
version: 0.14.22
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: webhdfs
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,10 +157,12 @@ files:
|
|
143
157
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
144
158
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
145
159
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
|
+
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
146
161
|
- test/helper.rb
|
147
162
|
- test/plugin/test_compressor.rb
|
148
163
|
- test/plugin/test_gzip_compressor.rb
|
149
164
|
- test/plugin/test_out_webhdfs.rb
|
165
|
+
- test/plugin/test_zstd_compressor.rb
|
150
166
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
151
167
|
licenses:
|
152
168
|
- Apache-2.0
|
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
182
|
- !ruby/object:Gem::Version
|
167
183
|
version: '0'
|
168
184
|
requirements: []
|
169
|
-
|
170
|
-
rubygems_version: 2.6.11
|
185
|
+
rubygems_version: 3.0.3
|
171
186
|
signing_key:
|
172
187
|
specification_version: 4
|
173
188
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
@@ -176,3 +191,4 @@ test_files:
|
|
176
191
|
- test/plugin/test_compressor.rb
|
177
192
|
- test/plugin/test_gzip_compressor.rb
|
178
193
|
- test/plugin/test_out_webhdfs.rb
|
194
|
+
- test/plugin/test_zstd_compressor.rb
|