fluent-plugin-webhdfs 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +35 -0
- data/README.md +2 -1
- data/fluent-plugin-webhdfs.gemspec +2 -2
- data/lib/fluent/plugin/out_webhdfs.rb +13 -1
- data/test/plugin/test_out_webhdfs.rb +102 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc42357da759e1c34ec12b3994bdd96b9f56cc1b093bb890f0ec4bccf929362d
|
4
|
+
data.tar.gz: e63cb6a5df15e5cf2fe8228d9e0e21ff5adf6db9cc5c4e11138aaac77429dc85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e613ca241b2624ac77c1b1651de28aad3b4e7060086067d1eefc4874241e0a60437bddf61f3775c834a94fdaf1cd374fad1cb5b60e16909db49cf9dc7663770b
|
7
|
+
data.tar.gz: 7eb4b39ab4763f661e1e213d736eb3544d88e1a73b24a0b0d59b6a715fa3fdbeceb62f5a47d14be6d06126747d4751f0b73fa7267591c6d2713d2dac35901f5e
|
@@ -0,0 +1,35 @@
|
|
1
|
+
name: Testing on Ubuntu
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
continue-on-error: ${{ matrix.experimental }}
|
9
|
+
strategy:
|
10
|
+
fail-fast: false
|
11
|
+
matrix:
|
12
|
+
ruby: [ '2.5', '2.6', '2.7', '3.0' ]
|
13
|
+
os:
|
14
|
+
- ubuntu-latest
|
15
|
+
experimental: [false]
|
16
|
+
include:
|
17
|
+
- ruby: head
|
18
|
+
os: ubuntu-latest
|
19
|
+
experimental: true
|
20
|
+
|
21
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
22
|
+
steps:
|
23
|
+
- uses: actions/checkout@v2
|
24
|
+
- name: Install dependencies
|
25
|
+
run: sudo apt-get install libsnappy-dev libzstd-dev
|
26
|
+
- uses: ruby/setup-ruby@v1
|
27
|
+
with:
|
28
|
+
ruby-version: ${{ matrix.ruby }}
|
29
|
+
- name: unit testing
|
30
|
+
env:
|
31
|
+
CI: true
|
32
|
+
run: |
|
33
|
+
gem install bundler rake
|
34
|
+
bundle install --jobs 4 --retry 3
|
35
|
+
bundle exec rake test
|
data/README.md
CHANGED
@@ -146,6 +146,7 @@ With kerberos authentication:
|
|
146
146
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
147
147
|
kerberos true
|
148
148
|
kerberos_keytab /path/to/keytab # if needed
|
149
|
+
renew_kerberos_delegation_token true # if needed
|
149
150
|
</match>
|
150
151
|
|
151
152
|
NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
|
@@ -254,7 +255,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
254
255
|
|
255
256
|
### For unstable Namenodes
|
256
257
|
|
257
|
-
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for
|
258
|
+
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inactive NameNodes.
|
258
259
|
|
259
260
|
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
260
261
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.5.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -23,5 +23,5 @@ Gem::Specification.new do |gem|
|
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
24
|
gem.add_development_dependency "zstandard"
|
25
25
|
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
26
|
-
gem.add_runtime_dependency "webhdfs", '>= 0.
|
26
|
+
gem.add_runtime_dependency "webhdfs", '>= 0.10.0'
|
27
27
|
end
|
@@ -66,6 +66,10 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
66
66
|
config_param :kerberos, :bool, default: false
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
|
+
desc 'Use delegation token while upload webhdfs or not'
|
70
|
+
config_param :renew_kerberos_delegation_token, :bool, default: false
|
71
|
+
desc 'delegation token reuse timer (default 8h)'
|
72
|
+
config_param :renew_kerberos_delegation_token_interval, :time, default: 8 * 60 * 60
|
69
73
|
|
70
74
|
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
|
71
75
|
desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
|
@@ -184,6 +188,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
184
188
|
raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
|
185
189
|
end
|
186
190
|
|
191
|
+
@renew_kerberos_delegation_token_interval_hour = nil
|
192
|
+
if @renew_kerberos_delegation_token
|
193
|
+
unless @username
|
194
|
+
raise Fluent::ConfigError, "username is missing. If you want to reuse delegation token, follow with kerberos accounts"
|
195
|
+
end
|
196
|
+
@renew_kerberos_delegation_token_interval_hour = @renew_kerberos_delegation_token_interval / 60 / 60
|
197
|
+
end
|
198
|
+
|
187
199
|
@client = prepare_client(@namenode_host, @namenode_port, @username)
|
188
200
|
if @standby_namenode_host
|
189
201
|
@client_standby = prepare_client(@standby_namenode_host, @standby_namenode_port, @username)
|
@@ -203,7 +215,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
203
215
|
end
|
204
216
|
|
205
217
|
def prepare_client(host, port, username)
|
206
|
-
client = WebHDFS::Client.new(host, port, username)
|
218
|
+
client = WebHDFS::Client.new(host, port, username, nil, nil, nil, {}, @renew_kerberos_delegation_token_interval_hour)
|
207
219
|
if @httpfs
|
208
220
|
client.httpfs_mode = true
|
209
221
|
end
|
@@ -117,7 +117,13 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
117
117
|
"namenode" => "server.local:14000",
|
118
118
|
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
119
119
|
"compress" => compress_type
|
120
|
-
}
|
120
|
+
}, [
|
121
|
+
config_element("buffer", "tag, time", {
|
122
|
+
"@type" => "memory",
|
123
|
+
"timekey_zone" => "+0300",
|
124
|
+
"timekey" => 60
|
125
|
+
})
|
126
|
+
])
|
121
127
|
d = create_driver(conf)
|
122
128
|
rescue Fluent::ConfigError => ex
|
123
129
|
omit ex.message
|
@@ -310,4 +316,99 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
310
316
|
assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
|
311
317
|
end
|
312
318
|
end
|
319
|
+
|
320
|
+
sub_test_case "kerberos config" do
|
321
|
+
CONFIG_KERBEROS = config_element(
|
322
|
+
"ROOT", "", {
|
323
|
+
"namenode" => "server.local:14000",
|
324
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
325
|
+
"username" => "hdfs_user",
|
326
|
+
"kerberos" => true,
|
327
|
+
"kerberos_keytab" => "/path/to/kerberos.keytab",
|
328
|
+
})
|
329
|
+
|
330
|
+
test "renew_kerberos_delegation_token default" do
|
331
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, nil).once
|
332
|
+
|
333
|
+
d = create_driver(CONFIG_KERBEROS)
|
334
|
+
|
335
|
+
assert_equal(
|
336
|
+
{
|
337
|
+
kerberos: true,
|
338
|
+
renew_kerberos_delegation_token: false,
|
339
|
+
renew_kerberos_delegation_token_interval_hour: nil,
|
340
|
+
},
|
341
|
+
{
|
342
|
+
kerberos: d.instance.kerberos,
|
343
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
344
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
345
|
+
})
|
346
|
+
end
|
347
|
+
|
348
|
+
test "default renew_kerberos_delegation_token_interval" do
|
349
|
+
expected_hour = 8
|
350
|
+
|
351
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, expected_hour).once
|
352
|
+
|
353
|
+
d = create_driver(CONFIG_KERBEROS +
|
354
|
+
config_element("", "", { "renew_kerberos_delegation_token" => true }))
|
355
|
+
|
356
|
+
assert_equal(
|
357
|
+
{
|
358
|
+
kerberos: true,
|
359
|
+
renew_kerberos_delegation_token: true,
|
360
|
+
renew_kerberos_delegation_token_interval: expected_hour * 60 * 60,
|
361
|
+
renew_kerberos_delegation_token_interval_hour: expected_hour,
|
362
|
+
},
|
363
|
+
{
|
364
|
+
kerberos: d.instance.kerberos,
|
365
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
366
|
+
renew_kerberos_delegation_token_interval: d.instance.instance_eval("@renew_kerberos_delegation_token_interval"),
|
367
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
368
|
+
})
|
369
|
+
end
|
370
|
+
|
371
|
+
test "renew_kerberos_delegation_token_interval" do
|
372
|
+
expected_hour = 10
|
373
|
+
|
374
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, expected_hour).once
|
375
|
+
|
376
|
+
d = create_driver(
|
377
|
+
CONFIG_KERBEROS +
|
378
|
+
config_element(
|
379
|
+
"", "",
|
380
|
+
{
|
381
|
+
"renew_kerberos_delegation_token" => true,
|
382
|
+
"renew_kerberos_delegation_token_interval" => "#{expected_hour}h",
|
383
|
+
}))
|
384
|
+
|
385
|
+
assert_equal(
|
386
|
+
{
|
387
|
+
kerberos: true,
|
388
|
+
renew_kerberos_delegation_token: true,
|
389
|
+
renew_kerberos_delegation_token_interval: expected_hour * 60 * 60,
|
390
|
+
renew_kerberos_delegation_token_interval_hour: expected_hour,
|
391
|
+
},
|
392
|
+
{
|
393
|
+
kerberos: d.instance.kerberos,
|
394
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
395
|
+
renew_kerberos_delegation_token_interval: d.instance.instance_eval("@renew_kerberos_delegation_token_interval"),
|
396
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
397
|
+
})
|
398
|
+
end
|
399
|
+
|
400
|
+
test "username is required for renew_kerberos_delegation_token" do
|
401
|
+
conf = config_element(
|
402
|
+
"ROOT", "", {
|
403
|
+
"namenode" => "server.local:14000",
|
404
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
405
|
+
"kerberos" => true,
|
406
|
+
"renew_kerberos_delegation_token" => true,
|
407
|
+
})
|
408
|
+
|
409
|
+
assert_raise(Fluent::ConfigError) do
|
410
|
+
create_driver(conf)
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
313
414
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.10.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.10.0
|
139
139
|
description: For WebHDFS and HttpFs of Hadoop HDFS
|
140
140
|
email:
|
141
141
|
- tagomoris@gmail.com
|
@@ -143,6 +143,7 @@ executables: []
|
|
143
143
|
extensions: []
|
144
144
|
extra_rdoc_files: []
|
145
145
|
files:
|
146
|
+
- ".github/workflows/linux.yml"
|
146
147
|
- ".gitignore"
|
147
148
|
- ".travis.yml"
|
148
149
|
- Appraisals
|
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
184
|
- !ruby/object:Gem::Version
|
184
185
|
version: '0'
|
185
186
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
187
|
+
rubygems_version: 3.2.5
|
187
188
|
signing_key:
|
188
189
|
specification_version: 4
|
189
190
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|