fluent-plugin-webhdfs 1.2.3 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +9 -4
- data/README.md +19 -2
- data/fluent-plugin-webhdfs.gemspec +3 -2
- data/lib/fluent/plugin/out_webhdfs.rb +21 -11
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/lib/fluent/plugin/webhdfs_compressor_zstd.rb +24 -0
- data/test/plugin/test_out_webhdfs.rb +20 -0
- data/test/plugin/test_zstd_compressor.rb +55 -0
- metadata +22 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ea946e8bbbf059043af07bb698aba729d7e5b2ecc7699b3275c34cfde49b3412
|
|
4
|
+
data.tar.gz: 31a42272f10bb2e16d60a93a923c3de1882cef45f05e76a5c38d86620d927795
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8d0527a147d497f309ef9c4d965ad87348e01e96eabe073810e2c613ba8473988866604a57dd7524d8a66ec4f23bba07d40d4f03d16b28847697a8b7717f9c36
|
|
7
|
+
data.tar.gz: d03c99a56a7a0e34424c923f0b3179bf038e46dee4a765142adbb9944dfe9c72d41c27a80c6757fdfb63f071a00f390dac1e74275a884406a3aa65b065481723
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
|
@@ -2,10 +2,11 @@ sudo: false
|
|
|
2
2
|
language: ruby
|
|
3
3
|
|
|
4
4
|
rvm:
|
|
5
|
-
- 2.
|
|
6
|
-
- 2.
|
|
7
|
-
- 2.
|
|
8
|
-
- 2.
|
|
5
|
+
- 2.4
|
|
6
|
+
- 2.5
|
|
7
|
+
- 2.6
|
|
8
|
+
- 2.7
|
|
9
|
+
- ruby-head
|
|
9
10
|
|
|
10
11
|
branches:
|
|
11
12
|
only:
|
|
@@ -23,3 +24,7 @@ script: bundle exec rake test
|
|
|
23
24
|
|
|
24
25
|
gemfile:
|
|
25
26
|
- Gemfile
|
|
27
|
+
|
|
28
|
+
matrix:
|
|
29
|
+
allow_failures:
|
|
30
|
+
- rvm: ruby-head
|
data/README.md
CHANGED
|
@@ -157,14 +157,31 @@ If you want to compress data before storing it:
|
|
|
157
157
|
host namenode.your.cluster.local
|
|
158
158
|
port 50070
|
|
159
159
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
|
160
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command'
|
|
160
|
+
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
|
161
161
|
</match>
|
|
162
162
|
|
|
163
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
|
|
163
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
|
|
164
164
|
Note that you have to install additional gem for several compress algorithms:
|
|
165
165
|
|
|
166
166
|
- snappy: install snappy gem
|
|
167
167
|
- bzip2: install bzip2-ffi gem
|
|
168
|
+
- zstd: install zstandard gem
|
|
169
|
+
|
|
170
|
+
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
|
171
|
+
|
|
172
|
+
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
|
173
|
+
|
|
174
|
+
<match access.**>
|
|
175
|
+
@type webhdfs
|
|
176
|
+
host namenode.your.cluster.local
|
|
177
|
+
port 50070
|
|
178
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
|
179
|
+
compress snappy
|
|
180
|
+
extension ".snappy"
|
|
181
|
+
</match>
|
|
182
|
+
|
|
183
|
+
With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
|
|
184
|
+
This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
|
|
168
185
|
|
|
169
186
|
### Namenode HA / Auto retry for WebHDFS known errors
|
|
170
187
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |gem|
|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
|
5
|
-
gem.version = "1.2
|
|
5
|
+
gem.version = "1.3.2"
|
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
|
24
|
-
gem.
|
|
24
|
+
gem.add_development_dependency "zstandard"
|
|
25
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
|
25
26
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
|
26
27
|
end
|
|
@@ -67,10 +67,13 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
|
67
67
|
desc 'kerberos keytab file'
|
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
|
69
69
|
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
|
70
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
|
71
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
|
72
72
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
|
73
73
|
|
|
74
|
+
desc 'HDFS file extensions (overrides default compressor extensions)'
|
|
75
|
+
config_param :extension, :string, default: nil
|
|
76
|
+
|
|
74
77
|
config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
|
|
75
78
|
config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
|
|
76
79
|
config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
|
|
@@ -267,17 +270,22 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
|
267
270
|
end
|
|
268
271
|
|
|
269
272
|
def send_data(path, data)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
@client.create(path, data)
|
|
275
|
-
end
|
|
273
|
+
return @client.create(path, data, {'overwrite' => 'true'}) unless @append
|
|
274
|
+
|
|
275
|
+
if path_exists?(path)
|
|
276
|
+
@client.append(path, data)
|
|
276
277
|
else
|
|
277
|
-
@client.create(path, data
|
|
278
|
+
@client.create(path, data)
|
|
278
279
|
end
|
|
279
280
|
end
|
|
280
281
|
|
|
282
|
+
def path_exists?(path)
|
|
283
|
+
@client.stat(path)
|
|
284
|
+
true
|
|
285
|
+
rescue WebHDFS::FileNotFoundError
|
|
286
|
+
false
|
|
287
|
+
end
|
|
288
|
+
|
|
281
289
|
HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
|
|
282
290
|
UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
|
|
283
291
|
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
|
@@ -315,11 +323,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
|
315
323
|
|
|
316
324
|
def generate_path(chunk)
|
|
317
325
|
hdfs_path = if @append
|
|
318
|
-
extract_placeholders(@path, chunk
|
|
326
|
+
extract_placeholders(@path, chunk)
|
|
319
327
|
else
|
|
320
|
-
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk
|
|
328
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
|
321
329
|
end
|
|
322
|
-
|
|
330
|
+
hdfs_ext = @extension || @compressor.ext
|
|
331
|
+
hdfs_path = "#{hdfs_path}#{hdfs_ext}"
|
|
323
332
|
if @replace_random_uuid
|
|
324
333
|
uuid_random = SecureRandom.uuid
|
|
325
334
|
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
|
@@ -519,3 +528,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
|
519
528
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
|
520
529
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
|
521
530
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
|
531
|
+
require 'fluent/plugin/webhdfs_compressor_zstd'
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
module Fluent::Plugin
|
|
2
|
+
class WebHDFSOutput < Output
|
|
3
|
+
class ZstdCompressor < Compressor
|
|
4
|
+
WebHDFSOutput.register_compressor('zstd', self)
|
|
5
|
+
|
|
6
|
+
def initialize(options = {})
|
|
7
|
+
begin
|
|
8
|
+
require "zstandard"
|
|
9
|
+
rescue LoadError
|
|
10
|
+
raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def ext
|
|
15
|
+
".zst"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def compress(chunk, tmp)
|
|
19
|
+
tmp.binmode
|
|
20
|
+
tmp.write Zstandard.deflate(chunk.read)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -126,6 +126,26 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
|
126
126
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
|
127
127
|
assert_equal compress_type, d.instance.compress
|
|
128
128
|
assert_equal compressor_class, d.instance.compressor.class
|
|
129
|
+
|
|
130
|
+
time = event_time("2020-10-03 15:07:00 +0300")
|
|
131
|
+
metadata = d.instance.metadata("test", time, {})
|
|
132
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
|
133
|
+
assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def test_explicit_extensions
|
|
137
|
+
conf = config_element(
|
|
138
|
+
"ROOT", "", {
|
|
139
|
+
"host" => "namenode.local",
|
|
140
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
|
141
|
+
"compress" => "snappy",
|
|
142
|
+
"extension" => ".snappy"
|
|
143
|
+
})
|
|
144
|
+
d = create_driver(conf)
|
|
145
|
+
time = event_time("2020-10-07 15:15:00 +0300")
|
|
146
|
+
metadata = d.instance.metadata("test", time, {})
|
|
147
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
|
148
|
+
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
|
129
149
|
end
|
|
130
150
|
|
|
131
151
|
def test_placeholders_old_style
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
require "helper"
|
|
2
|
+
require "fluent/plugin/buf_memory"
|
|
3
|
+
require 'zstandard'
|
|
4
|
+
|
|
5
|
+
class ZstdCompressorTest < Test::Unit::TestCase
|
|
6
|
+
class Zstd < self
|
|
7
|
+
|
|
8
|
+
CONFIG = %[
|
|
9
|
+
host namenode.local
|
|
10
|
+
path /hdfs/path/file.%Y%m%d.log
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
def setup
|
|
14
|
+
omit unless Object.const_defined?(:Zstandard)
|
|
15
|
+
Fluent::Test.setup
|
|
16
|
+
@compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def create_driver(conf = CONFIG)
|
|
20
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def test_ext
|
|
24
|
+
assert_equal(".zst", @compressor.ext)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_compress
|
|
28
|
+
d = create_driver
|
|
29
|
+
if d.instance.respond_to?(:buffer)
|
|
30
|
+
buffer = d.instance.buffer
|
|
31
|
+
else
|
|
32
|
+
buffer = d.instance.instance_variable_get(:@buffer)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
if buffer.respond_to?(:generate_chunk)
|
|
36
|
+
chunk = buffer.generate_chunk("test")
|
|
37
|
+
chunk.concat("hello gzip\n" * 32 * 1024, 1)
|
|
38
|
+
else
|
|
39
|
+
chunk = buffer.new_chunk("test")
|
|
40
|
+
chunk << "hello gzip\n" * 32 * 1024
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
io = Tempfile.new("zstd-")
|
|
44
|
+
@compressor.compress(chunk, io)
|
|
45
|
+
assert !io.closed?
|
|
46
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
|
47
|
+
assert(chunk_bytesize > io.read.bytesize)
|
|
48
|
+
io.rewind
|
|
49
|
+
reader = Zstandard.inflate(io.read)
|
|
50
|
+
io.rewind
|
|
51
|
+
assert_equal(chunk.read, reader)
|
|
52
|
+
io.close
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-webhdfs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.2
|
|
4
|
+
version: 1.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- TAGOMORI Satoshi
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-12-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -94,20 +94,34 @@ dependencies:
|
|
|
94
94
|
- - ">="
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
96
|
version: '0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: zstandard
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - ">="
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - ">="
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0'
|
|
97
111
|
- !ruby/object:Gem::Dependency
|
|
98
112
|
name: fluentd
|
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
|
100
114
|
requirements:
|
|
101
115
|
- - ">="
|
|
102
116
|
- !ruby/object:Gem::Version
|
|
103
|
-
version: 0.14.
|
|
117
|
+
version: 0.14.22
|
|
104
118
|
type: :runtime
|
|
105
119
|
prerelease: false
|
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
121
|
requirements:
|
|
108
122
|
- - ">="
|
|
109
123
|
- !ruby/object:Gem::Version
|
|
110
|
-
version: 0.14.
|
|
124
|
+
version: 0.14.22
|
|
111
125
|
- !ruby/object:Gem::Dependency
|
|
112
126
|
name: webhdfs
|
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -143,10 +157,12 @@ files:
|
|
|
143
157
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
|
144
158
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
|
145
159
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
|
160
|
+
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
|
146
161
|
- test/helper.rb
|
|
147
162
|
- test/plugin/test_compressor.rb
|
|
148
163
|
- test/plugin/test_gzip_compressor.rb
|
|
149
164
|
- test/plugin/test_out_webhdfs.rb
|
|
165
|
+
- test/plugin/test_zstd_compressor.rb
|
|
150
166
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
|
151
167
|
licenses:
|
|
152
168
|
- Apache-2.0
|
|
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
166
182
|
- !ruby/object:Gem::Version
|
|
167
183
|
version: '0'
|
|
168
184
|
requirements: []
|
|
169
|
-
|
|
170
|
-
rubygems_version: 2.6.13
|
|
185
|
+
rubygems_version: 3.0.3
|
|
171
186
|
signing_key:
|
|
172
187
|
specification_version: 4
|
|
173
188
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
|
@@ -176,3 +191,4 @@ test_files:
|
|
|
176
191
|
- test/plugin/test_compressor.rb
|
|
177
192
|
- test/plugin/test_gzip_compressor.rb
|
|
178
193
|
- test/plugin/test_out_webhdfs.rb
|
|
194
|
+
- test/plugin/test_zstd_compressor.rb
|