fluent-plugin-webhdfs 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +35 -0
- data/.travis.yml +9 -4
- data/README.md +30 -3
- data/fluent-plugin-webhdfs.gemspec +2 -2
- data/lib/fluent/plugin/out_webhdfs.rb +35 -12
- data/lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb +32 -0
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +8 -2
- data/test/plugin/test_out_webhdfs.rb +140 -1
- data/test/plugin/{test_compressor.rb → test_snappy_compressors.rb} +26 -12
- metadata +9 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc42357da759e1c34ec12b3994bdd96b9f56cc1b093bb890f0ec4bccf929362d
|
4
|
+
data.tar.gz: e63cb6a5df15e5cf2fe8228d9e0e21ff5adf6db9cc5c4e11138aaac77429dc85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e613ca241b2624ac77c1b1651de28aad3b4e7060086067d1eefc4874241e0a60437bddf61f3775c834a94fdaf1cd374fad1cb5b60e16909db49cf9dc7663770b
|
7
|
+
data.tar.gz: 7eb4b39ab4763f661e1e213d736eb3544d88e1a73b24a0b0d59b6a715fa3fdbeceb62f5a47d14be6d06126747d4751f0b73fa7267591c6d2713d2dac35901f5e
|
@@ -0,0 +1,35 @@
|
|
1
|
+
name: Testing on Ubuntu
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
continue-on-error: ${{ matrix.experimental }}
|
9
|
+
strategy:
|
10
|
+
fail-fast: false
|
11
|
+
matrix:
|
12
|
+
ruby: [ '2.5', '2.6', '2.7', '3.0' ]
|
13
|
+
os:
|
14
|
+
- ubuntu-latest
|
15
|
+
experimental: [false]
|
16
|
+
include:
|
17
|
+
- ruby: head
|
18
|
+
os: ubuntu-latest
|
19
|
+
experimental: true
|
20
|
+
|
21
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
22
|
+
steps:
|
23
|
+
- uses: actions/checkout@v2
|
24
|
+
- name: Install dependencies
|
25
|
+
run: sudo apt-get install libsnappy-dev libzstd-dev
|
26
|
+
- uses: ruby/setup-ruby@v1
|
27
|
+
with:
|
28
|
+
ruby-version: ${{ matrix.ruby }}
|
29
|
+
- name: unit testing
|
30
|
+
env:
|
31
|
+
CI: true
|
32
|
+
run: |
|
33
|
+
gem install bundler rake
|
34
|
+
bundle install --jobs 4 --retry 3
|
35
|
+
bundle exec rake test
|
data/.travis.yml
CHANGED
@@ -2,10 +2,11 @@ sudo: false
|
|
2
2
|
language: ruby
|
3
3
|
|
4
4
|
rvm:
|
5
|
-
- 2.
|
6
|
-
- 2.
|
7
|
-
- 2.
|
8
|
-
- 2.
|
5
|
+
- 2.4
|
6
|
+
- 2.5
|
7
|
+
- 2.6
|
8
|
+
- 2.7
|
9
|
+
- ruby-head
|
9
10
|
|
10
11
|
branches:
|
11
12
|
only:
|
@@ -23,3 +24,7 @@ script: bundle exec rake test
|
|
23
24
|
|
24
25
|
gemfile:
|
25
26
|
- Gemfile
|
27
|
+
|
28
|
+
matrix:
|
29
|
+
allow_failures:
|
30
|
+
- rvm: ruby-head
|
data/README.md
CHANGED
@@ -146,6 +146,7 @@ With kerberos authentication:
|
|
146
146
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
147
147
|
kerberos true
|
148
148
|
kerberos_keytab /path/to/keytab # if needed
|
149
|
+
renew_kerberos_delegation_token true # if needed
|
149
150
|
</match>
|
150
151
|
|
151
152
|
NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
|
@@ -157,18 +158,44 @@ If you want to compress data before storing it:
|
|
157
158
|
host namenode.your.cluster.local
|
158
159
|
port 50070
|
159
160
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
160
|
-
compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
|
161
|
+
compress gzip # or 'bzip2', 'snappy', 'hadoop_snappy', 'lzo_command', 'zstd'
|
161
162
|
</match>
|
162
163
|
|
163
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`,
|
164
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.sz`, `.snappy`, `.lzo`, `.zst`).
|
164
165
|
Note that you have to install additional gem for several compress algorithms:
|
165
166
|
|
166
167
|
- snappy: install snappy gem
|
168
|
+
- hadoop_snappy: install snappy gem
|
167
169
|
- bzip2: install bzip2-ffi gem
|
168
170
|
- zstd: install zstandard gem
|
169
171
|
|
170
172
|
Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
|
171
173
|
|
174
|
+
You can also specify compression block size (currently supported only for Snappy codecs):
|
175
|
+
|
176
|
+
<match access.**>
|
177
|
+
@type webhdfs
|
178
|
+
host namenode.your.cluster.local
|
179
|
+
port 50070
|
180
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
181
|
+
compress hadoop_snappy
|
182
|
+
block_size 32768
|
183
|
+
</match>
|
184
|
+
|
185
|
+
If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
|
186
|
+
|
187
|
+
<match access.**>
|
188
|
+
@type webhdfs
|
189
|
+
host namenode.your.cluster.local
|
190
|
+
port 50070
|
191
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
192
|
+
compress snappy
|
193
|
+
extension ".snappy"
|
194
|
+
</match>
|
195
|
+
|
196
|
+
With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
|
197
|
+
This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
|
198
|
+
|
172
199
|
### Namenode HA / Auto retry for WebHDFS known errors
|
173
200
|
|
174
201
|
`fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
|
@@ -228,7 +255,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
228
255
|
|
229
256
|
### For unstable Namenodes
|
230
257
|
|
231
|
-
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for
|
258
|
+
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inactive NameNodes.
|
232
259
|
|
233
260
|
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
234
261
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.
|
5
|
+
gem.version = "1.5.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -23,5 +23,5 @@ Gem::Specification.new do |gem|
|
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
24
|
gem.add_development_dependency "zstandard"
|
25
25
|
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
26
|
-
gem.add_runtime_dependency "webhdfs", '>= 0.
|
26
|
+
gem.add_runtime_dependency "webhdfs", '>= 0.10.0'
|
27
27
|
end
|
@@ -66,11 +66,18 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
66
66
|
config_param :kerberos, :bool, default: false
|
67
67
|
desc 'kerberos keytab file'
|
68
68
|
config_param :kerberos_keytab, :string, default: nil
|
69
|
+
desc 'Use delegation token while upload webhdfs or not'
|
70
|
+
config_param :renew_kerberos_delegation_token, :bool, default: false
|
71
|
+
desc 'delegation token reuse timer (default 8h)'
|
72
|
+
config_param :renew_kerberos_delegation_token_interval, :time, default: 8 * 60 * 60
|
69
73
|
|
70
|
-
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
|
71
|
-
desc "
|
74
|
+
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :hadoop_snappy, :lzo_command, :zstd, :text]
|
75
|
+
desc "Compression method (#{SUPPORTED_COMPRESS.join(',')})"
|
72
76
|
config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
|
73
77
|
|
78
|
+
desc 'HDFS file extensions (overrides default compressor extensions)'
|
79
|
+
config_param :extension, :string, default: nil
|
80
|
+
|
74
81
|
config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
|
75
82
|
config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
|
76
83
|
config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
|
@@ -153,6 +160,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
153
160
|
end
|
154
161
|
|
155
162
|
@compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
|
163
|
+
@compressor.configure(conf)
|
156
164
|
|
157
165
|
if @host
|
158
166
|
@namenode_host = @host
|
@@ -180,6 +188,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
180
188
|
raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
|
181
189
|
end
|
182
190
|
|
191
|
+
@renew_kerberos_delegation_token_interval_hour = nil
|
192
|
+
if @renew_kerberos_delegation_token
|
193
|
+
unless @username
|
194
|
+
raise Fluent::ConfigError, "username is missing. If you want to reuse delegation token, follow with kerberos accounts"
|
195
|
+
end
|
196
|
+
@renew_kerberos_delegation_token_interval_hour = @renew_kerberos_delegation_token_interval / 60 / 60
|
197
|
+
end
|
198
|
+
|
183
199
|
@client = prepare_client(@namenode_host, @namenode_port, @username)
|
184
200
|
if @standby_namenode_host
|
185
201
|
@client_standby = prepare_client(@standby_namenode_host, @standby_namenode_port, @username)
|
@@ -199,7 +215,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
199
215
|
end
|
200
216
|
|
201
217
|
def prepare_client(host, port, username)
|
202
|
-
client = WebHDFS::Client.new(host, port, username)
|
218
|
+
client = WebHDFS::Client.new(host, port, username, nil, nil, nil, {}, @renew_kerberos_delegation_token_interval_hour)
|
203
219
|
if @httpfs
|
204
220
|
client.httpfs_mode = true
|
205
221
|
end
|
@@ -267,17 +283,22 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
267
283
|
end
|
268
284
|
|
269
285
|
def send_data(path, data)
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
@client.create(path, data)
|
275
|
-
end
|
286
|
+
return @client.create(path, data, {'overwrite' => 'true'}) unless @append
|
287
|
+
|
288
|
+
if path_exists?(path)
|
289
|
+
@client.append(path, data)
|
276
290
|
else
|
277
|
-
@client.create(path, data
|
291
|
+
@client.create(path, data)
|
278
292
|
end
|
279
293
|
end
|
280
294
|
|
295
|
+
def path_exists?(path)
|
296
|
+
@client.stat(path)
|
297
|
+
true
|
298
|
+
rescue WebHDFS::FileNotFoundError
|
299
|
+
false
|
300
|
+
end
|
301
|
+
|
281
302
|
HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
|
282
303
|
UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
|
283
304
|
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
@@ -319,7 +340,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
319
340
|
else
|
320
341
|
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
321
342
|
end
|
322
|
-
|
343
|
+
hdfs_ext = @extension || @compressor.ext
|
344
|
+
hdfs_path = "#{hdfs_path}#{hdfs_ext}"
|
323
345
|
if @replace_random_uuid
|
324
346
|
uuid_random = SecureRandom.uuid
|
325
347
|
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
@@ -502,7 +524,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
502
524
|
begin
|
503
525
|
Open3.capture3("#{command} -V")
|
504
526
|
rescue Errno::ENOENT
|
505
|
-
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
527
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
506
528
|
end
|
507
529
|
end
|
508
530
|
end
|
@@ -518,5 +540,6 @@ require 'fluent/plugin/webhdfs_compressor_text'
|
|
518
540
|
require 'fluent/plugin/webhdfs_compressor_gzip'
|
519
541
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
520
542
|
require 'fluent/plugin/webhdfs_compressor_snappy'
|
543
|
+
require 'fluent/plugin/webhdfs_compressor_hadoop_snappy'
|
521
544
|
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
522
545
|
require 'fluent/plugin/webhdfs_compressor_zstd'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
|
+
class HadoopSnappyCompressor < Compressor
|
4
|
+
WebHDFSOutput.register_compressor('hadoop_snappy', self)
|
5
|
+
|
6
|
+
DEFAULT_BLOCK_SIZE = 256 * 1024
|
7
|
+
|
8
|
+
desc 'Block size for compression algorithm'
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
super()
|
13
|
+
begin
|
14
|
+
require "snappy"
|
15
|
+
rescue LoadError
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def ext
|
21
|
+
".snappy"
|
22
|
+
end
|
23
|
+
|
24
|
+
def compress(chunk, tmp)
|
25
|
+
Snappy::Hadoop::Writer.new(tmp, @block_size) do |w|
|
26
|
+
w << chunk.read
|
27
|
+
w.flush
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -3,11 +3,17 @@ module Fluent::Plugin
|
|
3
3
|
class SnappyCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('snappy', self)
|
5
5
|
|
6
|
+
DEFAULT_BLOCK_SIZE = 32 * 1024
|
7
|
+
|
8
|
+
desc 'Block size for compression algorithm'
|
9
|
+
config_param :block_size, :integer, default: DEFAULT_BLOCK_SIZE
|
10
|
+
|
6
11
|
def initialize(options = {})
|
12
|
+
super()
|
7
13
|
begin
|
8
14
|
require "snappy"
|
9
15
|
rescue LoadError
|
10
|
-
raise Fluent::ConfigError, "Install snappy before
|
16
|
+
raise Fluent::ConfigError, "Install snappy before using snappy compressor"
|
11
17
|
end
|
12
18
|
end
|
13
19
|
|
@@ -16,7 +22,7 @@ module Fluent::Plugin
|
|
16
22
|
end
|
17
23
|
|
18
24
|
def compress(chunk, tmp)
|
19
|
-
Snappy::Writer.new(tmp) do |w|
|
25
|
+
Snappy::Writer.new(tmp, @block_size) do |w|
|
20
26
|
w << chunk.read
|
21
27
|
w.flush
|
22
28
|
end
|
@@ -107,6 +107,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
107
107
|
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
108
108
|
bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
|
109
109
|
snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
110
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor],
|
110
111
|
lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
|
111
112
|
def test_compress(data)
|
112
113
|
compress_type, compressor_class = data
|
@@ -116,7 +117,13 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
116
117
|
"namenode" => "server.local:14000",
|
117
118
|
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
118
119
|
"compress" => compress_type
|
119
|
-
}
|
120
|
+
}, [
|
121
|
+
config_element("buffer", "tag, time", {
|
122
|
+
"@type" => "memory",
|
123
|
+
"timekey_zone" => "+0300",
|
124
|
+
"timekey" => 60
|
125
|
+
})
|
126
|
+
])
|
120
127
|
d = create_driver(conf)
|
121
128
|
rescue Fluent::ConfigError => ex
|
122
129
|
omit ex.message
|
@@ -126,6 +133,43 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
126
133
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
127
134
|
assert_equal compress_type, d.instance.compress
|
128
135
|
assert_equal compressor_class, d.instance.compressor.class
|
136
|
+
|
137
|
+
time = event_time("2020-10-03 15:07:00 +0300")
|
138
|
+
metadata = d.instance.metadata("test", time, {})
|
139
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
140
|
+
assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_explicit_extensions
|
144
|
+
conf = config_element(
|
145
|
+
"ROOT", "", {
|
146
|
+
"host" => "namenode.local",
|
147
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
148
|
+
"compress" => "snappy",
|
149
|
+
"extension" => ".snappy"
|
150
|
+
})
|
151
|
+
d = create_driver(conf)
|
152
|
+
time = event_time("2020-10-07 15:15:00 +0300")
|
153
|
+
metadata = d.instance.metadata("test", time, {})
|
154
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
155
|
+
assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
|
156
|
+
end
|
157
|
+
|
158
|
+
data(snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
159
|
+
hadoop_snappy: [:hadoop_snappy, Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor])
|
160
|
+
def test_compression_block_size(data)
|
161
|
+
compress_type, compressor_class = data
|
162
|
+
conf = config_element(
|
163
|
+
"ROOT", "", {
|
164
|
+
"host" => "namenode.local",
|
165
|
+
"path" => "/hdfs/path/file.%Y%m%d.log",
|
166
|
+
"compress" => compress_type,
|
167
|
+
"block_size" => 16384
|
168
|
+
})
|
169
|
+
d = create_driver(conf)
|
170
|
+
|
171
|
+
assert_equal compress_type, d.instance.compress
|
172
|
+
assert_equal 16384, d.instance.compressor.block_size
|
129
173
|
end
|
130
174
|
|
131
175
|
def test_placeholders_old_style
|
@@ -272,4 +316,99 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
272
316
|
assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
|
273
317
|
end
|
274
318
|
end
|
319
|
+
|
320
|
+
sub_test_case "kerberos config" do
|
321
|
+
CONFIG_KERBEROS = config_element(
|
322
|
+
"ROOT", "", {
|
323
|
+
"namenode" => "server.local:14000",
|
324
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
325
|
+
"username" => "hdfs_user",
|
326
|
+
"kerberos" => true,
|
327
|
+
"kerberos_keytab" => "/path/to/kerberos.keytab",
|
328
|
+
})
|
329
|
+
|
330
|
+
test "renew_kerberos_delegation_token default" do
|
331
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, nil).once
|
332
|
+
|
333
|
+
d = create_driver(CONFIG_KERBEROS)
|
334
|
+
|
335
|
+
assert_equal(
|
336
|
+
{
|
337
|
+
kerberos: true,
|
338
|
+
renew_kerberos_delegation_token: false,
|
339
|
+
renew_kerberos_delegation_token_interval_hour: nil,
|
340
|
+
},
|
341
|
+
{
|
342
|
+
kerberos: d.instance.kerberos,
|
343
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
344
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
345
|
+
})
|
346
|
+
end
|
347
|
+
|
348
|
+
test "default renew_kerberos_delegation_token_interval" do
|
349
|
+
expected_hour = 8
|
350
|
+
|
351
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, expected_hour).once
|
352
|
+
|
353
|
+
d = create_driver(CONFIG_KERBEROS +
|
354
|
+
config_element("", "", { "renew_kerberos_delegation_token" => true }))
|
355
|
+
|
356
|
+
assert_equal(
|
357
|
+
{
|
358
|
+
kerberos: true,
|
359
|
+
renew_kerberos_delegation_token: true,
|
360
|
+
renew_kerberos_delegation_token_interval: expected_hour * 60 * 60,
|
361
|
+
renew_kerberos_delegation_token_interval_hour: expected_hour,
|
362
|
+
},
|
363
|
+
{
|
364
|
+
kerberos: d.instance.kerberos,
|
365
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
366
|
+
renew_kerberos_delegation_token_interval: d.instance.instance_eval("@renew_kerberos_delegation_token_interval"),
|
367
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
368
|
+
})
|
369
|
+
end
|
370
|
+
|
371
|
+
test "renew_kerberos_delegation_token_interval" do
|
372
|
+
expected_hour = 10
|
373
|
+
|
374
|
+
mock.proxy(WebHDFS::Client).new("server.local", 14000, "hdfs_user", nil, nil, nil, {}, expected_hour).once
|
375
|
+
|
376
|
+
d = create_driver(
|
377
|
+
CONFIG_KERBEROS +
|
378
|
+
config_element(
|
379
|
+
"", "",
|
380
|
+
{
|
381
|
+
"renew_kerberos_delegation_token" => true,
|
382
|
+
"renew_kerberos_delegation_token_interval" => "#{expected_hour}h",
|
383
|
+
}))
|
384
|
+
|
385
|
+
assert_equal(
|
386
|
+
{
|
387
|
+
kerberos: true,
|
388
|
+
renew_kerberos_delegation_token: true,
|
389
|
+
renew_kerberos_delegation_token_interval: expected_hour * 60 * 60,
|
390
|
+
renew_kerberos_delegation_token_interval_hour: expected_hour,
|
391
|
+
},
|
392
|
+
{
|
393
|
+
kerberos: d.instance.kerberos,
|
394
|
+
renew_kerberos_delegation_token: d.instance.instance_eval("@renew_kerberos_delegation_token"),
|
395
|
+
renew_kerberos_delegation_token_interval: d.instance.instance_eval("@renew_kerberos_delegation_token_interval"),
|
396
|
+
renew_kerberos_delegation_token_interval_hour: d.instance.instance_eval("@renew_kerberos_delegation_token_interval_hour"),
|
397
|
+
})
|
398
|
+
end
|
399
|
+
|
400
|
+
test "username is required for renew_kerberos_delegation_token" do
|
401
|
+
conf = config_element(
|
402
|
+
"ROOT", "", {
|
403
|
+
"namenode" => "server.local:14000",
|
404
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
405
|
+
"kerberos" => true,
|
406
|
+
"renew_kerberos_delegation_token" => true,
|
407
|
+
})
|
408
|
+
|
409
|
+
assert_raise(Fluent::ConfigError) do
|
410
|
+
create_driver(conf)
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
275
414
|
end
|
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
rescue LoadError
|
6
6
|
end
|
7
7
|
|
8
|
-
class
|
8
|
+
class SnappyCompressorsTest < Test::Unit::TestCase
|
9
9
|
class Snappy < self
|
10
10
|
|
11
11
|
CONFIG = %[
|
@@ -16,7 +16,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
16
16
|
def setup
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
18
18
|
Fluent::Test.setup
|
19
|
-
|
19
|
+
|
20
|
+
@compressors_size = 2
|
21
|
+
@compressors = [
|
22
|
+
Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new,
|
23
|
+
Fluent::Plugin::WebHDFSOutput::HadoopSnappyCompressor.new
|
24
|
+
]
|
25
|
+
@readers = [
|
26
|
+
::Snappy::Reader,
|
27
|
+
::Snappy::Hadoop::Reader
|
28
|
+
]
|
29
|
+
@exts = [".sz", ".snappy"]
|
20
30
|
end
|
21
31
|
|
22
32
|
def create_driver(conf = CONFIG)
|
@@ -24,7 +34,9 @@ class CompressorTest < Test::Unit::TestCase
|
|
24
34
|
end
|
25
35
|
|
26
36
|
def test_ext
|
27
|
-
|
37
|
+
for i in 0...@compressors_size do
|
38
|
+
assert_equal(@exts[i], @compressors[i].ext)
|
39
|
+
end
|
28
40
|
end
|
29
41
|
|
30
42
|
def test_compress
|
@@ -43,15 +55,17 @@ class CompressorTest < Test::Unit::TestCase
|
|
43
55
|
chunk << "hello snappy\n" * 32 * 1024
|
44
56
|
end
|
45
57
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
58
|
+
for i in 0...@compressors_size do
|
59
|
+
io = Tempfile.new("snappy-")
|
60
|
+
@compressors[i].compress(chunk, io)
|
61
|
+
io.open
|
62
|
+
chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
|
63
|
+
assert(chunk_bytesize > io.read.bytesize)
|
64
|
+
io.rewind
|
65
|
+
reader = @readers[i].new(io)
|
66
|
+
assert_equal(chunk.read, reader.read)
|
67
|
+
io.close
|
68
|
+
end
|
55
69
|
end
|
56
70
|
end
|
57
71
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.10.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.10.0
|
139
139
|
description: For WebHDFS and HttpFs of Hadoop HDFS
|
140
140
|
email:
|
141
141
|
- tagomoris@gmail.com
|
@@ -143,6 +143,7 @@ executables: []
|
|
143
143
|
extensions: []
|
144
144
|
extra_rdoc_files: []
|
145
145
|
files:
|
146
|
+
- ".github/workflows/linux.yml"
|
146
147
|
- ".gitignore"
|
147
148
|
- ".travis.yml"
|
148
149
|
- Appraisals
|
@@ -154,14 +155,15 @@ files:
|
|
154
155
|
- lib/fluent/plugin/out_webhdfs.rb
|
155
156
|
- lib/fluent/plugin/webhdfs_compressor_bzip2.rb
|
156
157
|
- lib/fluent/plugin/webhdfs_compressor_gzip.rb
|
158
|
+
- lib/fluent/plugin/webhdfs_compressor_hadoop_snappy.rb
|
157
159
|
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
158
160
|
- lib/fluent/plugin/webhdfs_compressor_snappy.rb
|
159
161
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
160
162
|
- lib/fluent/plugin/webhdfs_compressor_zstd.rb
|
161
163
|
- test/helper.rb
|
162
|
-
- test/plugin/test_compressor.rb
|
163
164
|
- test/plugin/test_gzip_compressor.rb
|
164
165
|
- test/plugin/test_out_webhdfs.rb
|
166
|
+
- test/plugin/test_snappy_compressors.rb
|
165
167
|
- test/plugin/test_zstd_compressor.rb
|
166
168
|
homepage: https://github.com/fluent/fluent-plugin-webhdfs
|
167
169
|
licenses:
|
@@ -182,13 +184,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
182
184
|
- !ruby/object:Gem::Version
|
183
185
|
version: '0'
|
184
186
|
requirements: []
|
185
|
-
rubygems_version: 3.
|
187
|
+
rubygems_version: 3.2.5
|
186
188
|
signing_key:
|
187
189
|
specification_version: 4
|
188
190
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
189
191
|
test_files:
|
190
192
|
- test/helper.rb
|
191
|
-
- test/plugin/test_compressor.rb
|
192
193
|
- test/plugin/test_gzip_compressor.rb
|
193
194
|
- test/plugin/test_out_webhdfs.rb
|
195
|
+
- test/plugin/test_snappy_compressors.rb
|
194
196
|
- test/plugin/test_zstd_compressor.rb
|