fluent-plugin-webhdfs 1.2.0 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +6 -1
- data/fluent-plugin-webhdfs.gemspec +2 -2
- data/lib/fluent/plugin/out_webhdfs.rb +15 -5
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/test/plugin/test_out_webhdfs.rb +19 -1
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7a0c203ea54e21d95aea500fbb547ea965d5a5c07c1b277b69df2bbc44fa50fe
|
4
|
+
data.tar.gz: 6c93680eeba4575e2ef817e58772d4eb64a2e205696ad87b4b6decc1ed300e90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91be9ef776972c9d149221b2de2b765890e43e6df1e2a2408876b353aae629552a64d8029cfda84d8c03510e77ac0e164c7a8ed379d8d878505764df6181ca7e
|
7
|
+
data.tar.gz: 4b82ec9889bbd1a4e4fdd03150d082477a71ac03bacf5a72510e0dd816f8cfdb7e16dc5dc40ed02fdd2118f3a6e35919cf7032b0c7194bbdf76965dfc61c587e
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# fluent-plugin-webhdfs
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
4
6
|
|
5
7
|
"webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
|
@@ -143,8 +145,11 @@ With kerberos authentication:
|
|
143
145
|
port 50070
|
144
146
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
145
147
|
kerberos true
|
148
|
+
kerberos_keytab /path/to/keytab # if needed
|
146
149
|
</match>
|
147
150
|
|
151
|
+
NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
|
152
|
+
|
148
153
|
If you want to compress data before storing it:
|
149
154
|
|
150
155
|
<match access.**>
|
@@ -222,7 +227,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
222
227
|
|
223
228
|
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
|
224
229
|
|
225
|
-
If you were
|
230
|
+
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
226
231
|
|
227
232
|
<match access.**>
|
228
233
|
@type webhdfs
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.2.
|
5
|
+
gem.version = "1.2.5"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -21,6 +21,6 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
|
-
gem.add_runtime_dependency "fluentd", '>= 0.14.
|
24
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
25
25
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
26
26
|
end
|
@@ -64,6 +64,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
64
64
|
|
65
65
|
desc 'Use kerberos authentication or not'
|
66
66
|
config_param :kerberos, :bool, default: false
|
67
|
+
desc 'kerberos keytab file'
|
68
|
+
config_param :kerberos_keytab, :string, default: nil
|
67
69
|
|
68
70
|
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
69
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
@@ -96,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
96
98
|
end
|
97
99
|
|
98
100
|
def configure(conf)
|
99
|
-
compat_parameters_convert
|
100
|
-
|
101
|
+
# #compat_parameters_convert ignore time format in conf["path"],
|
102
|
+
# so check conf["path"] and overwrite the default value later if needed
|
101
103
|
timekey = case conf["path"]
|
102
104
|
when /%S/ then 1
|
103
105
|
when /%M/ then 60
|
104
106
|
when /%H/ then 3600
|
105
107
|
else 86400
|
106
108
|
end
|
109
|
+
if buffer_config = conf.elements(name: "buffer").first
|
110
|
+
timekey = buffer_config["timekey"] || timekey
|
111
|
+
end
|
112
|
+
|
113
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
|
114
|
+
|
107
115
|
if conf.elements(name: "buffer").empty?
|
108
116
|
e = Fluent::Config::Element.new("buffer", "time", {}, [])
|
109
117
|
conf.elements << e
|
110
118
|
end
|
111
119
|
buffer_config = conf.elements(name: "buffer").first
|
112
|
-
|
120
|
+
# explicitly set timekey
|
121
|
+
buffer_config["timekey"] = timekey
|
113
122
|
|
114
123
|
compat_parameters_convert_plaintextformatter(conf)
|
115
124
|
verify_config_placeholders_in_path!(conf)
|
@@ -208,6 +217,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
208
217
|
end
|
209
218
|
if @kerberos
|
210
219
|
client.kerberos = true
|
220
|
+
client.kerberos_keytab = @kerberos_keytab if @kerberos_keytab
|
211
221
|
end
|
212
222
|
|
213
223
|
client
|
@@ -305,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
305
315
|
|
306
316
|
def generate_path(chunk)
|
307
317
|
hdfs_path = if @append
|
308
|
-
extract_placeholders(@path, chunk
|
318
|
+
extract_placeholders(@path, chunk)
|
309
319
|
else
|
310
|
-
extract_placeholders(@path
|
320
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
311
321
|
end
|
312
322
|
hdfs_path = "#{hdfs_path}#{@compressor.ext}"
|
313
323
|
if @replace_random_uuid
|
@@ -89,7 +89,8 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
89
89
|
"ssl" => true,
|
90
90
|
"ssl_ca_file" => "/path/to/ca_file.pem",
|
91
91
|
"ssl_verify_mode" => "peer",
|
92
|
-
"kerberos" => true
|
92
|
+
"kerberos" => true,
|
93
|
+
"kerberos_keytab" => "/path/to/kerberos.keytab"
|
93
94
|
})
|
94
95
|
d = create_driver(conf)
|
95
96
|
|
@@ -100,6 +101,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
100
101
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
101
102
|
assert_equal :peer, d.instance.ssl_verify_mode
|
102
103
|
assert_equal true, d.instance.kerberos
|
104
|
+
assert_equal '/path/to/kerberos.keytab', d.instance.kerberos_keytab
|
103
105
|
end
|
104
106
|
|
105
107
|
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
@@ -166,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
166
168
|
metadata = d.instance.metadata("test", nil, {})
|
167
169
|
chunk = d.instance.buffer.generate_chunk(metadata)
|
168
170
|
assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
|
171
|
+
assert_empty d.instance.log.out.logs
|
169
172
|
end
|
170
173
|
|
171
174
|
data(path: { "append" => false },
|
@@ -204,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
204
207
|
assert_equal 1, d.instance.buffer_config.timekey
|
205
208
|
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
206
209
|
end
|
210
|
+
|
211
|
+
def test_time_key_without_buffer_section
|
212
|
+
conf = config_element(
|
213
|
+
"ROOT", "", {
|
214
|
+
"host" => "namenode.local",
|
215
|
+
"path" => "/hdfs/path/file.%Y%m%d-%M.log"
|
216
|
+
}
|
217
|
+
)
|
218
|
+
d = create_driver(conf)
|
219
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
220
|
+
metadata = d.instance.metadata("test", time, {})
|
221
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
222
|
+
assert_equal 60, d.instance.buffer_config.timekey
|
223
|
+
assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
|
224
|
+
end
|
207
225
|
end
|
208
226
|
|
209
227
|
sub_test_case "using format subsection" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.14.
|
103
|
+
version: 0.14.22
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.14.
|
110
|
+
version: 0.14.22
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: webhdfs
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,8 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
- !ruby/object:Gem::Version
|
167
167
|
version: '0'
|
168
168
|
requirements: []
|
169
|
-
|
170
|
-
rubygems_version: 2.6.11
|
169
|
+
rubygems_version: 3.0.3
|
171
170
|
signing_key:
|
172
171
|
specification_version: 4
|
173
172
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|