fluent-plugin-webhdfs 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 6289f2441877bae9eee9aa392bcb9953518e0648
4
- data.tar.gz: 6ba27fa2dda31b18234ce9658a2b19cce1dbd3ff
2
+ SHA256:
3
+ metadata.gz: 6318fcc2f54eb64be7d99978a04b1cad30e740734ed0dafdf1441cf65988f1ae
4
+ data.tar.gz: 1d49b9c8b009a9a6078c4f9e9fe08b79832510cc780e50344ea72e8493c8e5c1
5
5
  SHA512:
6
- metadata.gz: bb586a6832defd2f355f61fb0886b077877cd11926b875d45065bae7ad84128510782cc509c7b07b999a8409b23aeebb4e3fcebc816fc1b040c863ce76378e54
7
- data.tar.gz: 6ed07a2cad48bb9e58c1b4a9c1e8ecb8aabed2d19bedb4c5965ae45da794514c3b2b4ce5bcc248be3597b2745df871a6050af87b82e51c1c15af97b27d100738
6
+ metadata.gz: 16f6c451fe30d09b33255dcac7ed8714beb0c157fee3bdce8e041b1de9c0aa032910b8cf21ac1effd8dddbea4240a44c5e68e3a83275e386fd881dbb22c75b1d
7
+ data.tar.gz: c59533a82d5c46e9ea178402ede6852aa4d8fa7375981433b7f5b16311a55bb4e58234a0e28b657e318b690946c28622bfbda5da6b4318549342a394d6b8c15a
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ vendor/
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # fluent-plugin-webhdfs
2
2
 
3
+ [![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
4
+
3
5
  [Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
4
6
 
5
7
  "webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
@@ -155,14 +157,17 @@ If you want to compress data before storing it:
155
157
  host namenode.your.cluster.local
156
158
  port 50070
157
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
158
- compress gzip # or 'bzip2', 'snappy', 'lzo_command'
160
+ compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
159
161
  </match>
160
162
 
161
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
162
164
  Note that you have to install additional gem for several compress algorithms:
163
165
 
164
166
  - snappy: install snappy gem
165
167
  - bzip2: install bzip2-ffi gem
168
+ - zstd: install zstandard gem
169
+
170
+ Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
166
171
 
167
172
  ### Namenode HA / Auto retry for WebHDFS known errors
168
173
 
@@ -225,7 +230,7 @@ For high load cluster nodes, you can specify timeouts for HTTP requests.
225
230
 
226
231
  With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
227
232
 
228
- If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
233
+ If you were using unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
229
234
 
230
235
  <match access.**>
231
236
  @type webhdfs
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.1"
5
+ gem.version = "1.3.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_development_dependency "zstandard"
25
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
26
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
27
  end
@@ -67,7 +67,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
@@ -98,20 +98,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
98
98
  end
99
99
 
100
100
  def configure(conf)
101
- compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
102
-
101
+ # #compat_parameters_convert ignore time format in conf["path"],
102
+ # so check conf["path"] and overwrite the default value later if needed
103
103
  timekey = case conf["path"]
104
104
  when /%S/ then 1
105
105
  when /%M/ then 60
106
106
  when /%H/ then 3600
107
107
  else 86400
108
108
  end
109
+ if buffer_config = conf.elements(name: "buffer").first
110
+ timekey = buffer_config["timekey"] || timekey
111
+ end
112
+
113
+ compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
114
+
109
115
  if conf.elements(name: "buffer").empty?
110
116
  e = Fluent::Config::Element.new("buffer", "time", {}, [])
111
117
  conf.elements << e
112
118
  end
113
119
  buffer_config = conf.elements(name: "buffer").first
114
- buffer_config["timekey"] = timekey unless buffer_config["timekey"]
120
+ # explicitly set timekey
121
+ buffer_config["timekey"] = timekey
115
122
 
116
123
  compat_parameters_convert_plaintextformatter(conf)
117
124
  verify_config_placeholders_in_path!(conf)
@@ -308,9 +315,9 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
308
315
 
309
316
  def generate_path(chunk)
310
317
  hdfs_path = if @append
311
- extract_placeholders(@path, chunk.metadata)
318
+ extract_placeholders(@path, chunk)
312
319
  else
313
- extract_placeholders(@path, chunk.metadata).gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id))
320
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
314
321
  end
315
322
  hdfs_path = "#{hdfs_path}#{@compressor.ext}"
316
323
  if @replace_random_uuid
@@ -512,3 +519,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
512
519
  require 'fluent/plugin/webhdfs_compressor_bzip2'
513
520
  require 'fluent/plugin/webhdfs_compressor_snappy'
514
521
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
522
+ require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -0,0 +1,24 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class ZstdCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('zstd', self)
5
+
6
+ def initialize(options = {})
7
+ begin
8
+ require "zstandard"
9
+ rescue LoadError
10
+ raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
11
+ end
12
+ end
13
+
14
+ def ext
15
+ ".zst"
16
+ end
17
+
18
+ def compress(chunk, tmp)
19
+ tmp.binmode
20
+ tmp.write Zstandard.deflate(chunk.read)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -168,6 +168,7 @@ class WebHDFSOutputTest < Test::Unit::TestCase
168
168
  metadata = d.instance.metadata("test", nil, {})
169
169
  chunk = d.instance.buffer.generate_chunk(metadata)
170
170
  assert_equal "/hdfs/path/file.#{dump_unique_id_hex(chunk.unique_id)}.log", d.instance.generate_path(chunk)
171
+ assert_empty d.instance.log.out.logs
171
172
  end
172
173
 
173
174
  data(path: { "append" => false },
@@ -206,6 +207,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
206
207
  assert_equal 1, d.instance.buffer_config.timekey
207
208
  assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
208
209
  end
210
+
211
+ def test_time_key_without_buffer_section
212
+ conf = config_element(
213
+ "ROOT", "", {
214
+ "host" => "namenode.local",
215
+ "path" => "/hdfs/path/file.%Y%m%d-%M.log"
216
+ }
217
+ )
218
+ d = create_driver(conf)
219
+ time = event_time("2012-07-18 15:03:00 +0900")
220
+ metadata = d.instance.metadata("test", time, {})
221
+ chunk = d.instance.buffer.generate_chunk(metadata)
222
+ assert_equal 60, d.instance.buffer_config.timekey
223
+ assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
224
+ end
209
225
  end
210
226
 
211
227
  sub_test_case "using format subsection" do
@@ -0,0 +1,55 @@
1
+ require "helper"
2
+ require "fluent/plugin/buf_memory"
3
+ require 'zstandard'
4
+
5
+ class ZstdCompressorTest < Test::Unit::TestCase
6
+ class Zstd < self
7
+
8
+ CONFIG = %[
9
+ host namenode.local
10
+ path /hdfs/path/file.%Y%m%d.log
11
+ ]
12
+
13
+ def setup
14
+ omit unless Object.const_defined?(:Zstandard)
15
+ Fluent::Test.setup
16
+ @compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
17
+ end
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
21
+ end
22
+
23
+ def test_ext
24
+ assert_equal(".zst", @compressor.ext)
25
+ end
26
+
27
+ def test_compress
28
+ d = create_driver
29
+ if d.instance.respond_to?(:buffer)
30
+ buffer = d.instance.buffer
31
+ else
32
+ buffer = d.instance.instance_variable_get(:@buffer)
33
+ end
34
+
35
+ if buffer.respond_to?(:generate_chunk)
36
+ chunk = buffer.generate_chunk("test")
37
+ chunk.concat("hello gzip\n" * 32 * 1024, 1)
38
+ else
39
+ chunk = buffer.new_chunk("test")
40
+ chunk << "hello gzip\n" * 32 * 1024
41
+ end
42
+
43
+ io = Tempfile.new("zstd-")
44
+ @compressor.compress(chunk, io)
45
+ assert !io.closed?
46
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
47
+ assert(chunk_bytesize > io.read.bytesize)
48
+ io.rewind
49
+ reader = Zstandard.inflate(io.read)
50
+ io.rewind
51
+ assert_equal(chunk.read, reader)
52
+ io.close
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-28 00:00:00.000000000 Z
11
+ date: 2020-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,34 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: zstandard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: fluentd
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: 0.14.4
117
+ version: 0.14.22
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: 0.14.4
124
+ version: 0.14.22
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: webhdfs
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -143,10 +157,12 @@ files:
143
157
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
144
158
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
145
159
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
+ - lib/fluent/plugin/webhdfs_compressor_zstd.rb
146
161
  - test/helper.rb
147
162
  - test/plugin/test_compressor.rb
148
163
  - test/plugin/test_gzip_compressor.rb
149
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_zstd_compressor.rb
150
166
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
151
167
  licenses:
152
168
  - Apache-2.0
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
182
  - !ruby/object:Gem::Version
167
183
  version: '0'
168
184
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.11
185
+ rubygems_version: 3.0.3
171
186
  signing_key:
172
187
  specification_version: 4
173
188
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
@@ -176,3 +191,4 @@ test_files:
176
191
  - test/plugin/test_compressor.rb
177
192
  - test/plugin/test_gzip_compressor.rb
178
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_zstd_compressor.rb