fluent-plugin-webhdfs 1.2.0 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +6 -1
- data/fluent-plugin-webhdfs.gemspec +2 -2
- data/lib/fluent/plugin/out_webhdfs.rb +15 -5
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +4 -3
- data/test/plugin/test_out_webhdfs.rb +19 -1
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7a0c203ea54e21d95aea500fbb547ea965d5a5c07c1b277b69df2bbc44fa50fe
|
4
|
+
data.tar.gz: 6c93680eeba4575e2ef817e58772d4eb64a2e205696ad87b4b6decc1ed300e90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91be9ef776972c9d149221b2de2b765890e43e6df1e2a2408876b353aae629552a64d8029cfda84d8c03510e77ac0e164c7a8ed379d8d878505764df6181ca7e
|
7
|
+
data.tar.gz: 4b82ec9889bbd1a4e4fdd03150d082477a71ac03bacf5a72510e0dd816f8cfdb7e16dc5dc40ed02fdd2118f3a6e35919cf7032b0c7194bbdf76965dfc61c587e
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# fluent-plugin-webhdfs
|
2
2
|
|
3
|
+
[](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
4
6
|
|
5
7
|
"webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
|
@@ -143,8 +145,11 @@ With kerberos authentication:
|
|
143
145
|
port 50070
|
144
146
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
145
147
|
kerberos true
|
148
|
+
kerberos_keytab /path/to/keytab # if needed
|
146
149
|
</match>
|
147
150
|
|
151
|
+
NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
|
152
|
+
|
148
153
|
If you want to compress data before storing it:
|
149
154
|
|
150
155
|
<match access.**>
|
@@ -222,7 +227,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
222
227
|
|
223
228
|
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
|
224
229
|
|
225
|
-
If you were
|
230
|
+
If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
226
231
|
|
227
232
|
<match access.**>
|
228
233
|
@type webhdfs
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "1.2.
|
5
|
+
gem.version = "1.2.5"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -21,6 +21,6 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_development_dependency "appraisal"
|
22
22
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
23
|
gem.add_development_dependency "bzip2-ffi"
|
24
|
-
gem.add_runtime_dependency "fluentd", '>= 0.14.
|
24
|
+
gem.add_runtime_dependency "fluentd", '>= 0.14.22'
|
25
25
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
26
26
|
end
|
@@ -64,6 +64,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
64
64
|
|
65
65
|
desc 'Use kerberos authentication or not'
|
66
66
|
config_param :kerberos, :bool, default: false
|
67
|
+
desc 'kerberos keytab file'
|
68
|
+
config_param :kerberos_keytab, :string, default: nil
|
67
69
|
|
68
70
|
SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
|
69
71
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
@@ -96,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
96
98
|
end
|
97
99
|
|
98
100
|
def configure(conf)
|
99
|
-
compat_parameters_convert
|
100
|
-
|
101
|
+
# #compat_parameters_convert ignore time format in conf["path"],
|
102
|
+
# so check conf["path"] and overwrite the default value later if needed
|
101
103
|
timekey = case conf["path"]
|
102
104
|
when /%S/ then 1
|
103
105
|
when /%M/ then 60
|
104
106
|
when /%H/ then 3600
|
105
107
|
else 86400
|
106
108
|
end
|
109
|
+
if buffer_config = conf.elements(name: "buffer").first
|
110
|
+
timekey = buffer_config["timekey"] || timekey
|
111
|
+
end
|
112
|
+
|
113
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
|
114
|
+
|
107
115
|
if conf.elements(name: "buffer").empty?
|
108
116
|
e = Fluent::Config::Element.new("buffer", "time", {}, [])
|
109
117
|
conf.elements << e
|
110
118
|
end
|
111
119
|
buffer_config = conf.elements(name: "buffer").first
|
112
|
-
|
120
|
+
# explicitly set timekey
|
121
|
+
buffer_config["timekey"] = timekey
|
113
122
|
|
114
123
|
compat_parameters_convert_plaintextformatter(conf)
|
115
124
|
verify_config_placeholders_in_path!(conf)
|
@@ -208,6 +217,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
208
217
|
end
|
209
218
|
if @kerberos
|
210
219
|
client.kerberos = true
|
220
|
+
client.kerberos_keytab = @kerberos_keytab if @kerberos_keytab
|
211
221
|
end
|
212
222
|
|
213
223
|
client
|
@@ -305,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
305
315
|
|
306
316
|
def generate_path(chunk)
|
307
317
|
hdfs_path = if @append
|
308
|
-
extract_placeholders(@path, chunk
|
318
|
+
extract_placeholders(@path, chunk)
|
309
319
|
else
|
310
|
-
extract_placeholders(@path
|
320
|
+
extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
|
311
321
|
end
|
312
322
|
hdfs_path = "#{hdfs_path}#{@compressor.ext}"
|
313
323
|
if @replace_random_uuid
|
@@ -89,7 +89,8 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
89
89
|
"ssl" => true,
|
90
90
|
"ssl_ca_file" => "/path/to/ca_file.pem",
|
91
91
|
"ssl_verify_mode" => "peer",
|
92
|
-
"kerberos" => true
|
92
|
+
"kerberos" => true,
|
93
|
+
"kerberos_keytab" => "/path/to/kerberos.keytab"
|
93
94
|
})
|
94
95
|
d = create_driver(conf)
|
95
96
|
|
@@ -100,6 +101,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
100
101
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
101
102
|
assert_equal :peer, d.instance.ssl_verify_mode
|
102
103
|
assert_equal true, d.instance.kerberos
|
104
|
+
assert_equal '/path/to/kerberos.keytab', d.instance.kerberos_keytab
|
103
105
|
end
|
104
106
|
|
105
107
|
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
@@ -166,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
166
168
|
metadata = d.instance.metadata("test", nil, {})
|
167
169
|
chunk = d.instance.buffer.generate_chunk(metadata)
|
168
170
|
assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
|
171
|
+
assert_empty d.instance.log.out.logs
|
169
172
|
end
|
170
173
|
|
171
174
|
data(path: { "append" => false },
|
@@ -204,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
|
|
204
207
|
assert_equal 1, d.instance.buffer_config.timekey
|
205
208
|
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
206
209
|
end
|
210
|
+
|
211
|
+
def test_time_key_without_buffer_section
|
212
|
+
conf = config_element(
|
213
|
+
"ROOT", "", {
|
214
|
+
"host" => "namenode.local",
|
215
|
+
"path" => "/hdfs/path/file.%Y%m%d-%M.log"
|
216
|
+
}
|
217
|
+
)
|
218
|
+
d = create_driver(conf)
|
219
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
220
|
+
metadata = d.instance.metadata("test", time, {})
|
221
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
222
|
+
assert_equal 60, d.instance.buffer_config.timekey
|
223
|
+
assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
|
224
|
+
end
|
207
225
|
end
|
208
226
|
|
209
227
|
sub_test_case "using format subsection" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.14.
|
103
|
+
version: 0.14.22
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.14.
|
110
|
+
version: 0.14.22
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: webhdfs
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,8 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
- !ruby/object:Gem::Version
|
167
167
|
version: '0'
|
168
168
|
requirements: []
|
169
|
-
|
170
|
-
rubygems_version: 2.6.11
|
169
|
+
rubygems_version: 3.0.3
|
171
170
|
signing_key:
|
172
171
|
specification_version: 4
|
173
172
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|