fluent-plugin-webhdfs 1.3.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +35 -0
- data/.travis.yml +9 -4
- data/README.md +30 -3
- data/fluent-plugin-webhdfs.gemspec +2 -2
- data/lib/fluent/plugin/out_webhdfs.rb +35 -12
- data/lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb +32 -0
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +8 -2
- data/test/plugin/test_out_webhdfs.rb +140 -1
- data/test/plugin/{test_compressor.rb → test_snappy_compressors.rb} +26 -12
- metadata +9 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc42357da759e1c34ec12b3994bdd96b9f56cc1b093bb890f0ec4bccf929362d
|
4
|
+
data.tar.gz: e63cb6a5df15e5cf2fe8228d9e0e21ff5adf6db9cc5c4e11138aaac77429dc85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e613ca241b2624ac77c1b1651de28aad3b4e7060086067d1eefc4874241e0a60437bddf61f3775c834a94fdaf1cd374fad1cb5b60e16909db49cf9dc7663770b
|
7
|
+
data.tar.gz: 7eb4b39ab4763f661e1e213d736eb3544d88e1a73b24a0b0d59b6a715fa3fdbeceb62f5a47d14be6d06126747d4751f0b73fa7267591c6d2713d2dac35901f5e
|
@@ -0,0 +1,35 @@
|
|
1
|
+
name: Testing on Ubuntu
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
continue-on-error: ${{ matrix.experimental }}
|
9
|
+
strategy:
|
10
|
+
fail-fast: false
|
11
|
+
matrix:
|
12
|
+
ruby: [ '2.5', '2.6', '2.7', '3.0' ]
|
13
|
+
os:
|
14
|
+
- ubuntu-latest
|
15
|
+
experimental: [false]
|
16
|
+
include:
|
17
|
+
- ruby: head
|
18
|
+
os: ubuntu-latest
|
19
|
+
experimental: true
|
20
|
+
|
21
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
22
|
+
steps:
|
23
|
+
- uses: actions/checkout@v2
|
24
|
+
- name: Install dependencies
|
25
|
+
run: sudo apt-get install libsnappy-dev libzstd-dev
|
26
|
+
- uses: ruby/setup-ruby@v1
|
27
|
+
with:
|
28
|
+
ruby-version: ${{ matrix.ruby }}
|
29
|
+
- name: unit testing
|
30
|
+
env:
|
31
|
+
CI: true
|
32
|
+
run: |
|
33
|
+
gem install bundler rake
|
34
|
+
bundle install --jobs 4 --retry 3
|
35
|
+
bundle exec rake test
|
data/.travis.yml
CHANGED
@@ -2,10 +2,11 @@ sudo: false
|
|
2
2
|
language: ruby
|
3
3
|
|
4
4
|
rvm:
|
5
|
-
- 2.
|
6
|
-
- 2.
|
7
|
-
- 2.
|
8
|
-
- 2.
|
5
|
+
- 2.4
|
6
|
+
- 2.5
|
7
|
+
- 2.6
|
8
|
+
- 2.7
|
9
|
+
- ruby-head
|
9
10
|
|
10
11
|
branches:
|
11
12
|
only:
|
@@ -23,3 +24,7 @@ script: bundle exec rake test
|
|
23
24
|
|
24
25
|
gemfile:
|
25
26
|
- Gemfile
|
27
|
+
|
28
|
+
matrix:
|
29
|
+
allow_failures:
|
30
|
+
- rvm: ruby-head
|
data/README.md
CHANGED
@@ -146,6 +146,7 @@ With kerberos authentication:
|
|
146
146
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
147
147
|
kerberos true
|
148
148
|
kerberos_keytab /path/to/keytab # if needed
|
149
|
+
renew_kerberos_delegation_token true # if needed
|
149
150
|
</match>
|
150
151
|
|
151
152
|
NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
|
@@ -157,18 +158,44 @@ If you want to compress data before storing it:
|
|
157
158
|
host namenode.your.cluster.local
|
158
159
|
port 50070
|
159
160
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
160
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
161
|
+
compress gzip # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
|
161
162
|
</match>
|
162
163
|
|
163
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`,
|
164
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
|
164
165
|
Note that you have to install additional gem for several compress algorithms:
|
165
166
|
|
166
167
|
- snappy: install snappy gem
|
168
|
+
- hadoop_snappy: install snappy gem
|
167
169
|
- bzip2: install bzip2-ffi gem
|
168
170
|
- zstd: install zstandard gem
|
169
171
|
|
170
172
|
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
171
173
|
|
174
|
+
You can also specify compression block size (currently supported only for Snappy codecs):
|
175
|
+
|
176
|
+
<match access.**>
|
177
|
+
@type webhdfs
|
178
|
+
host namenode.your.cluster.local
|
179
|
+
port 50070
|
180
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
181
|
+
compress hadoop_snappy
|
182
|
+
block_size 32768
|
183
|
+
</match>
|
184
|
+
|
185
|
+
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
186
|
+
|
187
|
+
<match access.**>
|
188
|
+
@type webhdfs
|
189
|
+
host namenode.your.cluster.local
|
190
|
+
port 50070
|
191
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
192
|
+
compress snappy
|
193
|
+
extension ".snappy"
|
194
|
+
</match>
|
195
|
+
|
196
|
+
With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
|
197
|
+
This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
|
198
|
+
|
172
199
|
### Namenode HA / Auto retry for WebHDFS known errors
|
173
200
|
|
174
201
|
`fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
|
@@ -228,7 +255,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
228
255
|
|
229
256
|
### For unstable Namenodes
|
230
257
|
|
231
|
-
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for
|
258
|
+
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inactive NameNodes.
|
232
259
|
|
233
260
|
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
234
261
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.5.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -23,5 +23,5 @@ Gem::Specification.new do |gem|
|
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
24
|
gem.add_development_dependency "zstandard"
|
25
25
|
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
26
|
-
gem.add_runtime_dependency "webhdfs", '>= 0.
|
26
|
+
gem.add_runtime_dependency "webhdfs", '>= 0.10.0'
|
27
27
|
end
|
@@ -66,11 +66,18 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
66
66
|
config_param :kerberos, :bool, default: false
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
|
+
desc 'Use delegation token while upload webhdfs or not'
|
70
|
+
config_param :renew_kerberos_delegation_token, :bool, default: false
|
71
|
+
desc 'delegation token reuse timer (default 8h)'
|
72
|
+
config_param :renew_kerberos_delegation_token_interval, :time, default: 8 * 60 * 60
|
69
73
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
|
-
desc "
|
74
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
|
75
|
+
desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
76
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
77
|
|
78
|
+
desc 'HDFS file extensions (overrides default compressor extensions)'
|
79
|
+
config_param :extension, :string, default: nil
|
80
|
+
|
74
81
|
config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
|
75
82
|
config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
|
76
83
|
config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
|
@@ -153,6 +160,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
153
160
|
end
|
154
161
|
|
155
162
|
@compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
|
163
|
+
@compressor.configure(conf)
|
156
164
|
|
157
165
|
if @host
|
158
166
|
@namenode_host = @host
|
@@ -180,6 +188,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
180
188
|
raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
|
181
189
|
end
|
182
190
|
|
191
|
+
@renew_kerberos_delegation_token_interval_hour = nil
|
192
|
+
if @renew_kerberos_delegation_token
|
193
|
+
unless @username
|
194
|
+
raise Fluent::ConfigError, "username is missing. If you want to reuse delegation token, follow with kerberos accounts"
|
195
|
+
end
|
196
|
+
@renew_kerberos_delegation_token_interval_hour = @renew_kerberos_delegation_token_interval / 60 / 60
|
197
|
+
end
|
198
|
+
|
183
199
|
@client = prepare_client(@namenode_host, @namenode_port, @username)
|
184
200
|
if @standby_namenode_host
|
185
201
|
@client_standby = prepare_client(@standby_namenode_host, @standby_namenode_port, @username)
|
@@ -199,7 +215,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
199
215
|
end
|
200
216
|
|
201
217
|
def prepare_client(host, port, username)
|
202
|
-
client = WebHDFS::Client.new(host, port, username)
|
218
|
+
client = WebHDFS::Client.new(host, port, username, nil, nil, nil, {}, @renew_kerberos_delegation_token_interval_hour)
|
203
219
|
if @httpfs
|
204
220
|
client.httpfs_mode = true
|
205
221
|
end
|
@@ -267,17 +283,22 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
267
283
|
end
|
268
284
|
|
269
285
|
def send_data(path, data)
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
@client.create(path, data)
|
275
|
-
end
|
286
|
+
return @client.create(path, data, {'overwrite' => 'true'}) unless @append
|
287
|
+
|
288
|
+
if path_exists?(path)
|
289
|
+
@client.append(path, data)
|
276
290
|
else
|
277
|
-
@client.create(path, data
|
291
|
+
@client.create(path, data)
|
278
292
|
end
|
279
293
|
end
|
280
294
|
|
295
|
+
def path_exists?(path)
|
296
|
+
@client.stat(path)
|
297
|
+
true
|
298
|
+
rescue WebHDFS::FileNotFoundError
|
299
|
+
false
|
300
|
+
end
|
301
|
+
|
281
302
|
HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
|
282
303
|
UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
|
283
304
|
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
@@ -319,7 +340,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
319
340
|
else
|
320
341
|
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
321
342
|
end
|
322
|
-
|
343
|
+
hdfs_ext = @extension || @compressor.ext
|
344
|
+
hdfs_path = "#{hdfs_path}#{hdfs_ext}"
|
323
345
|
if @replace_random_uuid
|
324
346
|
uuid_random = SecureRandom.uuid
|
325
347
|
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
@@ -502,7 +524,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
502
524
|
begin
|
503
525
|
Open3.capture3("#{command} -V")
|
504
526
|
rescue Errno::ENOENT
|
505
|
-
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
527
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
506
528
|
end
|
507
529
|
end
|
508
530
|
end
|
@@ -518,5 +540,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
|
|
518
540
|
require 'fluent/plugin/webhdfs_compressor_gzip'
|
519
541
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
520
542
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
543
|
+
require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
|
521
544
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
522
545
|
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class HadoopSnappyCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('hadoop_snappy', self)
|
5
|
+
|
6
|
+
DEFAULT_BLOCK_SIZE = 256 * 1024
|
7
|
+
|
8
|
+
desc 'Block size for compression algorithm'
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
super()
|
13
|
+
begin
|
14
|
+
require "snappy"
|
15
|
+
rescue LoadError
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def ext
|
21
|
+
".snappy"
|
22
|
+
end
|
23
|
+
|
24
|
+
def compress(chunk, tmp)
|
25
|
+
Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
|
26
|
+
w << chunk.read
|
27
|
+
w.flush
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -3,11 +3,17 @@ module Fluent::Plugin
|
|
3
3
|
class SnappyCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('snappy', self)
|
5
5
|
|
6
|
+
DEFAULT_BLOCK_SIZE = 32 * 1024
|
7
|
+
|
8
|
+
desc 'Block size for compression algorithm'
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
10
|
+
|
6
11
|
def initialize(options = {})
|
12
|
+
super()
|
7
13
|
begin
|
8
14
|
require "snappy"
|
9
15
|
rescue LoadError
|
10
|
-
raise Fluent::ConfigError, "Install snappy before
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
11
17
|
end
|
12
18
|
end
|
13
19
|
|
@@ -16,7 +22,7 @@ module Fluent::Plugin
|
|
16
22
|
end
|
17
23
|
|
18
24
|
def compress(chunk, tmp)
|
19
|
-
Snappy::Writer.new(tmp) do |w|
|
25
|
+
Snappy::Writer.new(tmp, @block_size) do |w|
|
20
26
|
w << chunk.read
|
21
27
|
w.flush
|
22
28
|
end
|
@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
107
107
|
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
108
108
|
bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
|
109
109
|
snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
110
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
|
110
111
|
lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
|
111
112
|
def test_compress(data)
|
112
113
|
compress_type, compressor_class = data
|
@@ -116,7 +117,13 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
116
117
|
"namenode" => "server.local:14000",
|
117
118
|
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
118
119
|
"compress" => compress_type
|
119
|
-
}
|
120
|
+
}, [
|
121
|
+
config_element("buffer", "tag, time", {
|
122
|
+
"@type" => "memory",
|
123
|
+
"timekey_zone" => "+0300",
|
124
|
+
"timekey" => 60
|
125
|
+
})
|
126
|
+
])
|
120
127
|
d = create_driver(conf)
|
121
128
|
rescue Fluent::ConfigError => ex
|
122
129
|
omit ex.message
|
@@ -126,6 +133,43 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
126
133
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
127
134
|
assert_equal compress_type, d.instance.compress
|
128
135
|
assert_equal compressor_class, d.instance.compressor.class
|
136
|
+
|
137
|
+
time = event_time("2020-10-03 15:07:00 +0300")
|
138
|
+
metadata = d.instance.metadata("test", time, {})
|
139
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
140
|
+
assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_explicit_extensions
|
144
|
+
conf = config_element(
|
145
|
+
"ROOT", "", {
|
146
|
+
"host" => "namenode.local",
|
147
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
148
|
+
"compress" => "snappy",
|
149
|
+
"extension" => ".snappy"
|
150
|
+
})
|
151
|
+
d = create_driver(conf)
|
152
|
+
time = event_time("2020-10-07 15:15:00 +0300")
|
153
|
+
metadata = d.instance.metadata("test", time, {})
|
154
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
155
|
+
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
156
|
+
end
|
157
|
+
|
158
|
+
data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
159
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
|
160
|
+
def test_compression_block_size(data)
|
161
|
+
compress_type, compressor_class = data
|
162
|
+
conf = config_element(
|
163
|
+
"ROOT", "", {
|
164
|
+
"host" => "namenode.local",
|
165
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
166
|
+
"compress" => compress_type,
|
167
|
+
"block_size" => 16384
|
168
|
+
})
|
169
|
+
d = create_driver(conf)
|
170
|
+
|
171
|
+
assert_equal compress_type, d.instance.compress
|
172
|
+
assert_equal 16384, d.instance.compressor.block_size
|
129
173
|
end
|
130
174
|
|
131
175
|
def test_placeholders_old_style
|
@@ -272,4 +316,99 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
272
316
|
assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
|
273
317
|
end
|
274
318
|
end
|
319
|
+
|
320
|
+
sub_test_case "kerberos config" do
|
321
|
+
CONFIG_KERBEROS = config_element(
|
322
|
+
"ROOT", "", {
|
323
|
+
"namenode" => "server.local:14000",
|
324
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
325
|
+
"username" => "hdfs_user",
|
326
|
+
"kerberos" => true,
|
327
|
+
"kerberos_keytab" => "/path/to/kerberos.keytab",
|
328
|
+
})
|
329
|
+
|
330
|
+
test "renew_kerberos_delegation_token default" do
|
331
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, nil).once
|
332
|
+
|
333
|
+
d = create_driver(CONFIG_KERBEROS)
|
334
|
+
|
335
|
+
assert_equal(
|
336
|
+
{
|
337
|
+
kerberos: true,
|
338
|
+
renew_kerberos_delegation_token: false,
|
339
|
+
renew_kerberos_delegation_token_interval_hour: nil,
|
340
|
+
},
|
341
|
+
{
|
342
|
+
kerberos: d.instance.kerberos,
|
343
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
344
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
345
|
+
})
|
346
|
+
end
|
347
|
+
|
348
|
+
test "default renew_kerberos_delegation_token_interval" do
|
349
|
+
expected_hour = 8
|
350
|
+
|
351
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, expected_hour).once
|
352
|
+
|
353
|
+
d = create_driver(CONFIG_KERBEROS +
|
354
|
+
config_element("", "", { "renew_kerberos_delegation_token" => true }))
|
355
|
+
|
356
|
+
assert_equal(
|
357
|
+
{
|
358
|
+
kerberos: true,
|
359
|
+
renew_kerberos_delegation_token: true,
|
360
|
+
renew_kerberos_delegation_token_interval: expected_hour * 60 * 60,
|
361
|
+
renew_kerberos_delegation_token_interval_hour: expected_hour,
|
362
|
+
},
|
363
|
+
{
|
364
|
+
kerberos: d.instance.kerberos,
|
365
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
366
|
+
renew_kerberos_delegation_token_interval: d.instance.instance_eval("@renew_kerberos_delegation_token_interval"),
|
367
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
368
|
+
})
|
369
|
+
end
|
370
|
+
|
371
|
+
test "renew_kerberos_delegation_token_interval" do
|
372
|
+
expected_hour = 10
|
373
|
+
|
374
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, expected_hour).once
|
375
|
+
|
376
|
+
d = create_driver(
|
377
|
+
CONFIG_KERBEROS +
|
378
|
+
config_element(
|
379
|
+
"", "",
|
380
|
+
{
|
381
|
+
"renew_kerberos_delegation_token" => true,
|
382
|
+
"renew_kerberos_delegation_token_interval" => "#{expected_hour}h",
|
383
|
+
}))
|
384
|
+
|
385
|
+
assert_equal(
|
386
|
+
{
|
387
|
+
kerberos: true,
|
388
|
+
renew_kerberos_delegation_token: true,
|
389
|
+
renew_kerberos_delegation_token_interval: expected_hour * 60 * 60,
|
390
|
+
renew_kerberos_delegation_token_interval_hour: expected_hour,
|
391
|
+
},
|
392
|
+
{
|
393
|
+
kerberos: d.instance.kerberos,
|
394
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
395
|
+
renew_kerberos_delegation_token_interval: d.instance.instance_eval("@renew_kerberos_delegation_token_interval"),
|
396
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
397
|
+
})
|
398
|
+
end
|
399
|
+
|
400
|
+
test "username is required for renew_kerberos_delegation_token" do
|
401
|
+
conf = config_element(
|
402
|
+
"ROOT", "", {
|
403
|
+
"namenode" => "server.local:14000",
|
404
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
405
|
+
"kerberos" => true,
|
406
|
+
"renew_kerberos_delegation_token" => true,
|
407
|
+
})
|
408
|
+
|
409
|
+
assert_raise(Fluent::ConfigError) do
|
410
|
+
create_driver(conf)
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
275
414
|
end
|
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
rescue LoadError
|
6
6
|
end
|
7
7
|
|
8
|
-
class
|
8
|
+
class SnappyCompressorsTest < Test::Unit::TestCase
|
9
9
|
class Snappy < self
|
10
10
|
|
11
11
|
CONFIG = %[
|
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
16
16
|
def setup
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
18
18
|
Fluent::Test.setup
|
19
|
-
|
19
|
+
|
20
|
+
@compressors_size = 2
|
21
|
+
@compressors = [
|
22
|
+
Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
|
23
|
+
Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
|
24
|
+
]
|
25
|
+
@readers = [
|
26
|
+
::Snappy::Reader,
|
27
|
+
::Snappy::Hadoop::Reader
|
28
|
+
]
|
29
|
+
@exts = [".sz", ".snappy"]
|
20
30
|
end
|
21
31
|
|
22
32
|
def create_driver(conf = CONFIG)
|
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
|
|
24
34
|
end
|
25
35
|
|
26
36
|
def test_ext
|
27
|
-
|
37
|
+
for i in 0...@compressors_size do
|
38
|
+
assert_equal(@exts[i], @compressors[i].ext)
|
39
|
+
end
|
28
40
|
end
|
29
41
|
|
30
42
|
def test_compress
|
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
43
55
|
chunk << "hello snappy\n" * 32 * 1024
|
44
56
|
end
|
45
57
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
58
|
+
for i in 0...@compressors_size do
|
59
|
+
io = Tempfile.new("snappy-")
|
60
|
+
@compressors[i].compress(chunk, io)
|
61
|
+
io.open
|
62
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
63
|
+
assert(chunk_bytesize > io.read.bytesize)
|
64
|
+
io.rewind
|
65
|
+
reader = @readers[i].new(io)
|
66
|
+
assert_equal(chunk.read, reader.read)
|
67
|
+
io.close
|
68
|
+
end
|
55
69
|
end
|
56
70
|
end
|
57
71
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.10.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.10.0
|
139
139
|
description: For WebHDFS and HttpFs of Hadoop HDFS
|
140
140
|
email:
|
141
141
|
- tagomoris@gmail.com
|
@@ -143,6 +143,7 @@ executables: []
|
|
143
143
|
extensions: []
|
144
144
|
extra_rdoc_files: []
|
145
145
|
files:
|
146
|
+
- ".github/workflows/linux.yml"
|
146
147
|
- ".gitignore"
|
147
148
|
- ".travis.yml"
|
148
149
|
- Appraisals
|
@@ -154,14 +155,15 @@ files:
|
|
154
155
|
- lib/fluent/plugin/out_webhdfs.rb
|
155
156
|
- lib/fluent/plugin/webhdfs_compressor_bzip2.rb
|
156
157
|
- lib/fluent/plugin/webhdfs_compressor_gzip.rb
|
158
|
+
- lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
|
157
159
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
158
160
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
159
161
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
162
|
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
161
163
|
- test/helper.rb
|
162
|
-
- test/plugin/test_compressor.rb
|
163
164
|
- test/plugin/test_gzip_compressor.rb
|
164
165
|
- test/plugin/test_out_webhdfs.rb
|
166
|
+
- test/plugin/test_snappy_compressors.rb
|
165
167
|
- test/plugin/test_zstd_compressor.rb
|
166
168
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
167
169
|
licenses:
|
@@ -182,13 +184,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
182
184
|
- !ruby/object:Gem::Version
|
183
185
|
version: '0'
|
184
186
|
requirements: []
|
185
|
-
rubygems_version: 3.
|
187
|
+
rubygems_version: 3.2.5
|
186
188
|
signing_key:
|
187
189
|
specification_version: 4
|
188
190
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
189
191
|
test_files:
|
190
192
|
- test/helper.rb
|
191
|
-
- test/plugin/test_compressor.rb
|
192
193
|
- test/plugin/test_gzip_compressor.rb
|
193
194
|
- test/plugin/test_out_webhdfs.rb
|
195
|
+
- test/plugin/test_snappy_compressors.rb
|
194
196
|
- test/plugin/test_zstd_compressor.rb
|