fluent-plugin-webhdfs 1.2.0 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: a68b2c521a6b09397a3ec62b31a839a746b968d6
4
- data.tar.gz: d41eda6c2024f91215d13af9a3bfe606c3f5a984
2
+ SHA256:
3
+ metadata.gz: 7a0c203ea54e21d95aea500fbb547ea965d5a5c07c1b277b69df2bbc44fa50fe
4
+ data.tar.gz: 6c93680eeba4575e2ef817e58772d4eb64a2e205696ad87b4b6decc1ed300e90
5
5
  SHA512:
6
- metadata.gz: 0d90a9def013f28eaf3e710aa039b2a40f7ac79215ee8c3b0bf88936f53b4a27fe12c6681ad89da2cd2be8a14026f4194022aa2f9ee860b658f26b38e4ca03ad
7
- data.tar.gz: ee3ee7ff6c0524e1b5b14fd5bd168d44094cfd84e36f9d872f3ad28f3f5dfc6e4bd76b14c4ba60bd472bd40f34b2f63bcd9f53c4c62a2290e204402e8bfcb444
6
+ metadata.gz: 91be9ef776972c9d149221b2de2b765890e43e6df1e2a2408876b353aae629552a64d8029cfda84d8c03510e77ac0e164c7a8ed379d8d878505764df6181ca7e
7
+ data.tar.gz: 4b82ec9889bbd1a4e4fdd03150d082477a71ac03bacf5a72510e0dd816f8cfdb7e16dc5dc40ed02fdd2118f3a6e35919cf7032b0c7194bbdf76965dfc61c587e
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # fluent-plugin-webhdfs
2
2
 
3
+ [![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
4
+
3
5
  [Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
4
6
 
5
7
  "webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
@@ -143,8 +145,11 @@ With kerberos authentication:
143
145
  port 50070
144
146
  path /path/on/hdfs/access.log.%Y%m%d_%H.log
145
147
  kerberos true
148
+ kerberos_keytab /path/to/keytab # if needed
146
149
  </match>
147
150
 
151
+ NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
152
+
148
153
  If you want to compress data before storing it:
149
154
 
150
155
  <match access.**>
@@ -222,7 +227,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
222
227
 
223
228
  With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
224
229
 
225
- If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
230
+ If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
226
231
 
227
232
  <match access.**>
228
233
  @type webhdfs
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.0"
5
+ gem.version = "1.2.5"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,6 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
25
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
26
  end
@@ -64,6 +64,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
64
64
 
65
65
  desc 'Use kerberos authentication or not'
66
66
  config_param :kerberos, :bool, default: false
67
+ desc 'kerberos keytab file'
68
+ config_param :kerberos_keytab, :string, default: nil
67
69
 
68
70
  SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
69
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
@@ -96,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
96
98
  end
97
99
 
98
100
  def configure(conf)
99
- compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
100
-
101
+ # #compat_parameters_convert ignore time format in conf["path"],
102
+ # so check conf["path"] and overwrite the default value later if needed
101
103
  timekey = case conf["path"]
102
104
  when /%S/ then 1
103
105
  when /%M/ then 60
104
106
  when /%H/ then 3600
105
107
  else 86400
106
108
  end
109
+ if buffer_config = conf.elements(name: "buffer").first
110
+ timekey = buffer_config["timekey"] || timekey
111
+ end
112
+
113
+ compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
114
+
107
115
  if conf.elements(name: "buffer").empty?
108
116
  e = Fluent::Config::Element.new("buffer", "time", {}, [])
109
117
  conf.elements << e
110
118
  end
111
119
  buffer_config = conf.elements(name: "buffer").first
112
- buffer_config["timekey"] = timekey unless buffer_config["timekey"]
120
+ # explicitly set timekey
121
+ buffer_config["timekey"] = timekey
113
122
 
114
123
  compat_parameters_convert_plaintextformatter(conf)
115
124
  verify_config_placeholders_in_path!(conf)
@@ -208,6 +217,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
208
217
  end
209
218
  if @kerberos
210
219
  client.kerberos = true
220
+ client.kerberos_keytab = @kerberos_keytab if @kerberos_keytab
211
221
  end
212
222
 
213
223
  client
@@ -305,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
305
315
 
306
316
  def generate_path(chunk)
307
317
  hdfs_path = if @append
308
- extract_placeholders(@path, chunk.metadata)
318
+ extract_placeholders(@path, chunk)
309
319
  else
310
- extract_placeholders(@path, chunk.metadata).gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id))
320
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
311
321
  end
312
322
  hdfs_path = "#{hdfs_path}#{@compressor.ext}"
313
323
  if @replace_random_uuid
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -89,7 +89,8 @@ class WebHDFSOutputTest < Test::Unit::TestCase
89
89
  "ssl" => true,
90
90
  "ssl_ca_file" => "/path/to/ca_file.pem",
91
91
  "ssl_verify_mode" => "peer",
92
- "kerberos" => true
92
+ "kerberos" => true,
93
+ "kerberos_keytab" => "/path/to/kerberos.keytab"
93
94
  })
94
95
  d = create_driver(conf)
95
96
 
@@ -100,6 +101,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
100
101
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
101
102
  assert_equal :peer, d.instance.ssl_verify_mode
102
103
  assert_equal true, d.instance.kerberos
104
+ assert_equal '/path/to/kerberos.keytab', d.instance.kerberos_keytab
103
105
  end
104
106
 
105
107
  data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
@@ -166,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
166
168
  metadata = d.instance.metadata("test", nil, {})
167
169
  chunk = d.instance.buffer.generate_chunk(metadata)
168
170
  assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
171
+ assert_empty d.instance.log.out.logs
169
172
  end
170
173
 
171
174
  data(path: { "append" => false },
@@ -204,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
204
207
  assert_equal 1, d.instance.buffer_config.timekey
205
208
  assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
206
209
  end
210
+
211
+ def test_time_key_without_buffer_section
212
+ conf = config_element(
213
+ "ROOT", "", {
214
+ "host" => "namenode.local",
215
+ "path" => "/hdfs/path/file.%Y%m%d-%M.log"
216
+ }
217
+ )
218
+ d = create_driver(conf)
219
+ time = event_time("2012-07-18 15:03:00 +0900")
220
+ metadata = d.instance.metadata("test", time, {})
221
+ chunk = d.instance.buffer.generate_chunk(metadata)
222
+ assert_equal 60, d.instance.buffer_config.timekey
223
+ assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
224
+ end
207
225
  end
208
226
 
209
227
  sub_test_case "using format subsection" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-19 00:00:00.000000000 Z
11
+ date: 2020-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: 0.14.4
103
+ version: 0.14.22
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: 0.14.4
110
+ version: 0.14.22
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: webhdfs
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -166,8 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
166
  - !ruby/object:Gem::Version
167
167
  version: '0'
168
168
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.11
169
+ rubygems_version: 3.0.3
171
170
  signing_key:
172
171
  specification_version: 4
173
172
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting