fluent-plugin-webhdfs 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 6289f2441877bae9eee9aa392bcb9953518e0648
4
- data.tar.gz: 6ba27fa2dda31b18234ce9658a2b19cce1dbd3ff
2
+ SHA256:
3
+ metadata.gz: 6318fcc2f54eb64be7d99978a04b1cad30e740734ed0dafdf1441cf65988f1ae
4
+ data.tar.gz: 1d49b9c8b009a9a6078c4f9e9fe08b79832510cc780e50344ea72e8493c8e5c1
5
5
  SHA512:
6
- metadata.gz: bb586a6832defd2f355f61fb0886b077877cd11926b875d45065bae7ad84128510782cc509c7b07b999a8409b23aeebb4e3fcebc816fc1b040c863ce76378e54
7
- data.tar.gz: 6ed07a2cad48bb9e58c1b4a9c1e8ecb8aabed2d19bedb4c5965ae45da794514c3b2b4ce5bcc248be3597b2745df871a6050af87b82e51c1c15af97b27d100738
6
+ metadata.gz: 16f6c451fe30d09b33255dcac7ed8714beb0c157fee3bdce8e041b1de9c0aa032910b8cf21ac1effd8dddbea4240a44c5e68e3a83275e386fd881dbb22c75b1d
7
+ data.tar.gz: c59533a82d5c46e9ea178402ede6852aa4d8fa7375981433b7f5b16311a55bb4e58234a0e28b657e318b690946c28622bfbda5da6b4318549342a394d6b8c15a
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ vendor/
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # fluent-plugin-webhdfs
2
2
 
3
+ [![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
4
+
3
5
  [Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
4
6
 
5
7
  "webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
@@ -155,14 +157,17 @@ If you want to compress data before storing it:
155
157
  host namenode.your.cluster.local
156
158
  port 50070
157
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
158
- compress gzip # or 'bzip2', 'snappy', 'lzo_command'
160
+ compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
159
161
  </match>
160
162
 
161
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
162
164
  Note that you have to install additional gem for several compress algorithms:
163
165
 
164
166
  - snappy: install snappy gem
165
167
  - bzip2: install bzip2-ffi gem
168
+ - zstd: install zstandard gem
169
+
170
+ Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
166
171
 
167
172
  ### Namenode HA / Auto retry for WebHDFS known errors
168
173
 
@@ -225,7 +230,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
225
230
 
226
231
  With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
227
232
 
228
- If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
233
+ If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
229
234
 
230
235
  <match access.**>
231
236
  @type webhdfs
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.1"
5
+ gem.version = "1.3.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_development_dependency "zstandard"
25
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
26
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
27
  end
@@ -67,7 +67,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
@@ -98,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
98
98
  end
99
99
 
100
100
  def configure(conf)
101
- compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
102
-
101
+ # #compat_parameters_convert ignore time format in conf["path"],
102
+ # so check conf["path"] and overwrite the default value later if needed
103
103
  timekey = case conf["path"]
104
104
  when /%S/ then 1
105
105
  when /%M/ then 60
106
106
  when /%H/ then 3600
107
107
  else 86400
108
108
  end
109
+ if buffer_config = conf.elements(name: "buffer").first
110
+ timekey = buffer_config["timekey"] || timekey
111
+ end
112
+
113
+ compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
114
+
109
115
  if conf.elements(name: "buffer").empty?
110
116
  e = Fluent::Config::Element.new("buffer", "time", {}, [])
111
117
  conf.elements << e
112
118
  end
113
119
  buffer_config = conf.elements(name: "buffer").first
114
- buffer_config["timekey"] = timekey unless buffer_config["timekey"]
120
+ # explicitly set timekey
121
+ buffer_config["timekey"] = timekey
115
122
 
116
123
  compat_parameters_convert_plaintextformatter(conf)
117
124
  verify_config_placeholders_in_path!(conf)
@@ -308,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
308
315
 
309
316
  def generate_path(chunk)
310
317
  hdfs_path = if @append
311
- extract_placeholders(@path, chunk.metadata)
318
+ extract_placeholders(@path, chunk)
312
319
  else
313
- extract_placeholders(@path, chunk.metadata).gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id))
320
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
314
321
  end
315
322
  hdfs_path = "#{hdfs_path}#{@compressor.ext}"
316
323
  if @replace_random_uuid
@@ -512,3 +519,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
512
519
  require 'fluent/plugin/webhdfs_compressor_bzip2'
513
520
  require 'fluent/plugin/webhdfs_compressor_snappy'
514
521
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
522
+ require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -0,0 +1,24 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class ZstdCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('zstd', self)
5
+
6
+ def initialize(options = {})
7
+ begin
8
+ require "zstandard"
9
+ rescue LoadError
10
+ raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
11
+ end
12
+ end
13
+
14
+ def ext
15
+ ".zst"
16
+ end
17
+
18
+ def compress(chunk, tmp)
19
+ tmp.binmode
20
+ tmp.write Zstandard.deflate(chunk.read)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -168,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
168
168
  metadata = d.instance.metadata("test", nil, {})
169
169
  chunk = d.instance.buffer.generate_chunk(metadata)
170
170
  assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
171
+ assert_empty d.instance.log.out.logs
171
172
  end
172
173
 
173
174
  data(path: { "append" => false },
@@ -206,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
206
207
  assert_equal 1, d.instance.buffer_config.timekey
207
208
  assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
208
209
  end
210
+
211
+ def test_time_key_without_buffer_section
212
+ conf = config_element(
213
+ "ROOT", "", {
214
+ "host" => "namenode.local",
215
+ "path" => "/hdfs/path/file.%Y%m%d-%M.log"
216
+ }
217
+ )
218
+ d = create_driver(conf)
219
+ time = event_time("2012-07-18 15:03:00 +0900")
220
+ metadata = d.instance.metadata("test", time, {})
221
+ chunk = d.instance.buffer.generate_chunk(metadata)
222
+ assert_equal 60, d.instance.buffer_config.timekey
223
+ assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
224
+ end
209
225
  end
210
226
 
211
227
  sub_test_case "using format subsection" do
@@ -0,0 +1,55 @@
1
+ require "helper"
2
+ require "fluent/plugin/buf_memory"
3
+ require 'zstandard'
4
+
5
+ class ZstdCompressorTest < Test::Unit::TestCase
6
+ class Zstd < self
7
+
8
+ CONFIG = %[
9
+ host namenode.local
10
+ path /hdfs/path/file.%Y%m%d.log
11
+ ]
12
+
13
+ def setup
14
+ omit unless Object.const_defined?(:Zstandard)
15
+ Fluent::Test.setup
16
+ @compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
17
+ end
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
21
+ end
22
+
23
+ def test_ext
24
+ assert_equal(".zst", @compressor.ext)
25
+ end
26
+
27
+ def test_compress
28
+ d = create_driver
29
+ if d.instance.respond_to?(:buffer)
30
+ buffer = d.instance.buffer
31
+ else
32
+ buffer = d.instance.instance_variable_get(:@buffer)
33
+ end
34
+
35
+ if buffer.respond_to?(:generate_chunk)
36
+ chunk = buffer.generate_chunk("test")
37
+ chunk.concat("hello gzip\n" * 32 * 1024, 1)
38
+ else
39
+ chunk = buffer.new_chunk("test")
40
+ chunk << "hello gzip\n" * 32 * 1024
41
+ end
42
+
43
+ io = Tempfile.new("zstd-")
44
+ @compressor.compress(chunk, io)
45
+ assert !io.closed?
46
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
47
+ assert(chunk_bytesize > io.read.bytesize)
48
+ io.rewind
49
+ reader = Zstandard.inflate(io.read)
50
+ io.rewind
51
+ assert_equal(chunk.read, reader)
52
+ io.close
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-28 00:00:00.000000000 Z
11
+ date: 2020-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,34 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: zstandard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: fluentd
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: 0.14.4
117
+ version: 0.14.22
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: 0.14.4
124
+ version: 0.14.22
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: webhdfs
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -143,10 +157,12 @@ files:
143
157
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
144
158
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
145
159
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
+ - lib/fluent/plugin/webhdfs_compressor_zstd.rb
146
161
  - test/helper.rb
147
162
  - test/plugin/test_compressor.rb
148
163
  - test/plugin/test_gzip_compressor.rb
149
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_zstd_compressor.rb
150
166
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
151
167
  licenses:
152
168
  - Apache-2.0
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
182
  - !ruby/object:Gem::Version
167
183
  version: '0'
168
184
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.11
185
+ rubygems_version: 3.0.3
171
186
  signing_key:
172
187
  specification_version: 4
173
188
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
@@ -176,3 +191,4 @@ test_files:
176
191
  - test/plugin/test_compressor.rb
177
192
  - test/plugin/test_gzip_compressor.rb
178
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_zstd_compressor.rb