fluent-plugin-webhdfs 1.2.0 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: a68b2c521a6b09397a3ec62b31a839a746b968d6
4
- data.tar.gz: d41eda6c2024f91215d13af9a3bfe606c3f5a984
2
+ SHA256:
3
+ metadata.gz: 7a0c203ea54e21d95aea500fbb547ea965d5a5c07c1b277b69df2bbc44fa50fe
4
+ data.tar.gz: 6c93680eeba4575e2ef817e58772d4eb64a2e205696ad87b4b6decc1ed300e90
5
5
  SHA512:
6
- metadata.gz: 0d90a9def013f28eaf3e710aa039b2a40f7ac79215ee8c3b0bf88936f53b4a27fe12c6681ad89da2cd2be8a14026f4194022aa2f9ee860b658f26b38e4ca03ad
7
- data.tar.gz: ee3ee7ff6c0524e1b5b14fd5bd168d44094cfd84e36f9d872f3ad28f3f5dfc6e4bd76b14c4ba60bd472bd40f34b2f63bcd9f53c4c62a2290e204402e8bfcb444
6
+ metadata.gz: 91be9ef776972c9d149221b2de2b765890e43e6df1e2a2408876b353aae629552a64d8029cfda84d8c03510e77ac0e164c7a8ed379d8d878505764df6181ca7e
7
+ data.tar.gz: 4b82ec9889bbd1a4e4fdd03150d082477a71ac03bacf5a72510e0dd816f8cfdb7e16dc5dc40ed02fdd2118f3a6e35919cf7032b0c7194bbdf76965dfc61c587e
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # fluent-plugin-webhdfs
2
2
 
3
+ [![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
4
+
3
5
  [Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
4
6
 
5
7
  "webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
@@ -143,8 +145,11 @@ With kerberos authentication:
143
145
  port 50070
144
146
  path /path/on/hdfs/access.log.%Y%m%d_%H.log
145
147
  kerberos true
148
+ kerberos_keytab /path/to/keytab # if needed
146
149
  </match>
147
150
 
151
+ NOTE: You need to install `gssapi` gem for kerberos. See https://github.com/kzk/webhdfs#for-kerberos-authentication
152
+
148
153
  If you want to compress data before storing it:
149
154
 
150
155
  <match access.**>
@@ -222,7 +227,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
222
227
 
223
228
  With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
224
229
 
225
- If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
230
+ If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
226
231
 
227
232
  <match access.**>
228
233
  @type webhdfs
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.0"
5
+ gem.version = "1.2.5"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,6 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
25
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
26
  end
@@ -64,6 +64,8 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
64
64
 
65
65
  desc 'Use kerberos authentication or not'
66
66
  config_param :kerberos, :bool, default: false
67
+ desc 'kerberos keytab file'
68
+ config_param :kerberos_keytab, :string, default: nil
67
69
 
68
70
  SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
69
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
@@ -96,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
96
98
  end
97
99
 
98
100
  def configure(conf)
99
- compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
100
-
101
+ # #compat_parameters_convert ignore time format in conf["path"],
102
+ # so check conf["path"] and overwrite the default value later if needed
101
103
  timekey = case conf["path"]
102
104
  when /%S/ then 1
103
105
  when /%M/ then 60
104
106
  when /%H/ then 3600
105
107
  else 86400
106
108
  end
109
+ if buffer_config = conf.elements(name: "buffer").first
110
+ timekey = buffer_config["timekey"] || timekey
111
+ end
112
+
113
+ compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
114
+
107
115
  if conf.elements(name: "buffer").empty?
108
116
  e = Fluent::Config::Element.new("buffer", "time", {}, [])
109
117
  conf.elements << e
110
118
  end
111
119
  buffer_config = conf.elements(name: "buffer").first
112
- buffer_config["timekey"] = timekey unless buffer_config["timekey"]
120
+ # explicitly set timekey
121
+ buffer_config["timekey"] = timekey
113
122
 
114
123
  compat_parameters_convert_plaintextformatter(conf)
115
124
  verify_config_placeholders_in_path!(conf)
@@ -208,6 +217,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
208
217
  end
209
218
  if @kerberos
210
219
  client.kerberos = true
220
+ client.kerberos_keytab = @kerberos_keytab if @kerberos_keytab
211
221
  end
212
222
 
213
223
  client
@@ -305,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
305
315
 
306
316
  def generate_path(chunk)
307
317
  hdfs_path = if @append
308
- extract_placeholders(@path, chunk.metadata)
318
+ extract_placeholders(@path, chunk)
309
319
  else
310
- extract_placeholders(@path, chunk.metadata).gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id))
320
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
311
321
  end
312
322
  hdfs_path = "#{hdfs_path}#{@compressor.ext}"
313
323
  if @replace_random_uuid
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -89,7 +89,8 @@ class WebHDFSOutputTest < Test::Unit::TestCase
89
89
  "ssl" => true,
90
90
  "ssl_ca_file" => "/path/to/ca_file.pem",
91
91
  "ssl_verify_mode" => "peer",
92
- "kerberos" => true
92
+ "kerberos" => true,
93
+ "kerberos_keytab" => "/path/to/kerberos.keytab"
93
94
  })
94
95
  d = create_driver(conf)
95
96
 
@@ -100,6 +101,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
100
101
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
101
102
  assert_equal :peer, d.instance.ssl_verify_mode
102
103
  assert_equal true, d.instance.kerberos
104
+ assert_equal '/path/to/kerberos.keytab', d.instance.kerberos_keytab
103
105
  end
104
106
 
105
107
  data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
@@ -166,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
166
168
  metadata = d.instance.metadata("test", nil, {})
167
169
  chunk = d.instance.buffer.generate_chunk(metadata)
168
170
  assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
171
+ assert_empty d.instance.log.out.logs
169
172
  end
170
173
 
171
174
  data(path: { "append" => false },
@@ -204,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
204
207
  assert_equal 1, d.instance.buffer_config.timekey
205
208
  assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
206
209
  end
210
+
211
+ def test_time_key_without_buffer_section
212
+ conf = config_element(
213
+ "ROOT", "", {
214
+ "host" => "namenode.local",
215
+ "path" => "/hdfs/path/file.%Y%m%d-%M.log"
216
+ }
217
+ )
218
+ d = create_driver(conf)
219
+ time = event_time("2012-07-18 15:03:00 +0900")
220
+ metadata = d.instance.metadata("test", time, {})
221
+ chunk = d.instance.buffer.generate_chunk(metadata)
222
+ assert_equal 60, d.instance.buffer_config.timekey
223
+ assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
224
+ end
207
225
  end
208
226
 
209
227
  sub_test_case "using format subsection" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-19 00:00:00.000000000 Z
11
+ date: 2020-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: 0.14.4
103
+ version: 0.14.22
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: 0.14.4
110
+ version: 0.14.22
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: webhdfs
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -166,8 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
166
  - !ruby/object:Gem::Version
167
167
  version: '0'
168
168
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.11
169
+ rubygems_version: 3.0.3
171
170
  signing_key:
172
171
  specification_version: 4
173
172
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting