fluent-plugin-webhdfs 1.2.3 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 39cdb79659d5d95293f47ef4c3ae99e8827a3af9
4
- data.tar.gz: 91674f2e77c578f1f7761c41e971495541c30043
2
+ SHA256:
3
+ metadata.gz: ea946e8bbbf059043af07bb698aba729d7e5b2ecc7699b3275c34cfde49b3412
4
+ data.tar.gz: 31a42272f10bb2e16d60a93a923c3de1882cef45f05e76a5c38d86620d927795
5
5
  SHA512:
6
- metadata.gz: 6d72ec81812d8fb264ccde08e0dfb4c4d06605f488509cff5046539e070e840fe2ca9b7e0caa481778f325b1dbf3a4bda2fe632cc007d1be6053c9dceb939de0
7
- data.tar.gz: 3c258d6362c05f6778bbf5d6b9e18d96565ed4914d15c8988bfd0334d14ccbdb1d4d3c26073b8d8700931a64cb9667b7c3413db2d87aaae693b78be3f3341a23
6
+ metadata.gz: 8d0527a147d497f309ef9c4d965ad87348e01e96eabe073810e2c613ba8473988866604a57dd7524d8a66ec4f23bba07d40d4f03d16b28847697a8b7717f9c36
7
+ data.tar.gz: d03c99a56a7a0e34424c923f0b3179bf038e46dee4a765142adbb9944dfe9c72d41c27a80c6757fdfb63f071a00f390dac1e74275a884406a3aa65b065481723
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ vendor/
@@ -2,10 +2,11 @@ sudo: false
2
2
  language: ruby
3
3
 
4
4
  rvm:
5
- - 2.1
6
- - 2.2
7
- - 2.3.3
8
- - 2.4.0
5
+ - 2.4
6
+ - 2.5
7
+ - 2.6
8
+ - 2.7
9
+ - ruby-head
9
10
 
10
11
  branches:
11
12
  only:
@@ -23,3 +24,7 @@ script: bundle exec rake test
23
24
 
24
25
  gemfile:
25
26
  - Gemfile
27
+
28
+ matrix:
29
+ allow_failures:
30
+ - rvm: ruby-head
data/README.md CHANGED
@@ -157,14 +157,31 @@ If you want to compress data before storing it:
157
157
  host namenode.your.cluster.local
158
158
  port 50070
159
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
160
- compress gzip # or 'bzip2', 'snappy', 'lzo_command'
160
+ compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
161
161
  </match>
162
162
 
163
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
164
164
  Note that you have to install additional gem for several compress algorithms:
165
165
 
166
166
  - snappy: install snappy gem
167
167
  - bzip2: install bzip2-ffi gem
168
+ - zstd: install zstandard gem
169
+
170
+ Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
171
+
172
+ If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
173
+
174
+ <match access.**>
175
+ @type webhdfs
176
+ host namenode.your.cluster.local
177
+ port 50070
178
+ path /path/on/hdfs/access.log.%Y%m%d_%H
179
+ compress snappy
180
+ extension ".snappy"
181
+ </match>
182
+
183
+ With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
184
+ This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
168
185
 
169
186
  ### Namenode HA / Auto retry for WebHDFS known errors
170
187
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.3"
5
+ gem.version = "1.3.2"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_development_dependency "zstandard"
25
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
26
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
27
  end
@@ -67,10 +67,13 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
74
+ desc 'HDFS file extensions (overrides default compressor extensions)'
75
+ config_param :extension, :string, default: nil
76
+
74
77
  config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
75
78
  config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
76
79
  config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
@@ -267,17 +270,22 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
267
270
  end
268
271
 
269
272
  def send_data(path, data)
270
- if @append
271
- begin
272
- @client.append(path, data)
273
- rescue WebHDFS::FileNotFoundError
274
- @client.create(path, data)
275
- end
273
+ return @client.create(path, data, {'overwrite' => 'true'}) unless @append
274
+
275
+ if path_exists?(path)
276
+ @client.append(path, data)
276
277
  else
277
- @client.create(path, data, {'overwrite' => 'true'})
278
+ @client.create(path, data)
278
279
  end
279
280
  end
280
281
 
282
+ def path_exists?(path)
283
+ @client.stat(path)
284
+ true
285
+ rescue WebHDFS::FileNotFoundError
286
+ false
287
+ end
288
+
281
289
  HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
282
290
  UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
283
291
  UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
@@ -315,11 +323,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
315
323
 
316
324
  def generate_path(chunk)
317
325
  hdfs_path = if @append
318
- extract_placeholders(@path, chunk.metadata)
326
+ extract_placeholders(@path, chunk)
319
327
  else
320
- extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk.metadata)
328
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
321
329
  end
322
- hdfs_path = "#{hdfs_path}#{@compressor.ext}"
330
+ hdfs_ext = @extension || @compressor.ext
331
+ hdfs_path = "#{hdfs_path}#{hdfs_ext}"
323
332
  if @replace_random_uuid
324
333
  uuid_random = SecureRandom.uuid
325
334
  hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
@@ -519,3 +528,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
519
528
  require 'fluent/plugin/webhdfs_compressor_bzip2'
520
529
  require 'fluent/plugin/webhdfs_compressor_snappy'
521
530
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
531
+ require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -0,0 +1,24 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class ZstdCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('zstd', self)
5
+
6
+ def initialize(options = {})
7
+ begin
8
+ require "zstandard"
9
+ rescue LoadError
10
+ raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
11
+ end
12
+ end
13
+
14
+ def ext
15
+ ".zst"
16
+ end
17
+
18
+ def compress(chunk, tmp)
19
+ tmp.binmode
20
+ tmp.write Zstandard.deflate(chunk.read)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -126,6 +126,26 @@ class WebHDFSOutputTest < Test::Unit::TestCase
126
126
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
127
127
  assert_equal compress_type, d.instance.compress
128
128
  assert_equal compressor_class, d.instance.compressor.class
129
+
130
+ time = event_time("2020-10-03 15:07:00 +0300")
131
+ metadata = d.instance.metadata("test", time, {})
132
+ chunk = d.instance.buffer.generate_chunk(metadata)
133
+ assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
134
+ end
135
+
136
+ def test_explicit_extensions
137
+ conf = config_element(
138
+ "ROOT", "", {
139
+ "host" => "namenode.local",
140
+ "path" => "/hdfs/path/file.%Y%m%d.log",
141
+ "compress" => "snappy",
142
+ "extension" => ".snappy"
143
+ })
144
+ d = create_driver(conf)
145
+ time = event_time("2020-10-07 15:15:00 +0300")
146
+ metadata = d.instance.metadata("test", time, {})
147
+ chunk = d.instance.buffer.generate_chunk(metadata)
148
+ assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
129
149
  end
130
150
 
131
151
  def test_placeholders_old_style
@@ -0,0 +1,55 @@
1
+ require "helper"
2
+ require "fluent/plugin/buf_memory"
3
+ require 'zstandard'
4
+
5
+ class ZstdCompressorTest < Test::Unit::TestCase
6
+ class Zstd < self
7
+
8
+ CONFIG = %[
9
+ host namenode.local
10
+ path /hdfs/path/file.%Y%m%d.log
11
+ ]
12
+
13
+ def setup
14
+ omit unless Object.const_defined?(:Zstandard)
15
+ Fluent::Test.setup
16
+ @compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
17
+ end
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
21
+ end
22
+
23
+ def test_ext
24
+ assert_equal(".zst", @compressor.ext)
25
+ end
26
+
27
+ def test_compress
28
+ d = create_driver
29
+ if d.instance.respond_to?(:buffer)
30
+ buffer = d.instance.buffer
31
+ else
32
+ buffer = d.instance.instance_variable_get(:@buffer)
33
+ end
34
+
35
+ if buffer.respond_to?(:generate_chunk)
36
+ chunk = buffer.generate_chunk("test")
37
+ chunk.concat("hello gzip\n" * 32 * 1024, 1)
38
+ else
39
+ chunk = buffer.new_chunk("test")
40
+ chunk << "hello gzip\n" * 32 * 1024
41
+ end
42
+
43
+ io = Tempfile.new("zstd-")
44
+ @compressor.compress(chunk, io)
45
+ assert !io.closed?
46
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
47
+ assert(chunk_bytesize > io.read.bytesize)
48
+ io.rewind
49
+ reader = Zstandard.inflate(io.read)
50
+ io.rewind
51
+ assert_equal(chunk.read, reader)
52
+ io.close
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-21 00:00:00.000000000 Z
11
+ date: 2020-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,34 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: zstandard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: fluentd
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: 0.14.4
117
+ version: 0.14.22
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: 0.14.4
124
+ version: 0.14.22
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: webhdfs
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -143,10 +157,12 @@ files:
143
157
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
144
158
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
145
159
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
+ - lib/fluent/plugin/webhdfs_compressor_zstd.rb
146
161
  - test/helper.rb
147
162
  - test/plugin/test_compressor.rb
148
163
  - test/plugin/test_gzip_compressor.rb
149
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_zstd_compressor.rb
150
166
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
151
167
  licenses:
152
168
  - Apache-2.0
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
182
  - !ruby/object:Gem::Version
167
183
  version: '0'
168
184
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.13
185
+ rubygems_version: 3.0.3
171
186
  signing_key:
172
187
  specification_version: 4
173
188
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
@@ -176,3 +191,4 @@ test_files:
176
191
  - test/plugin/test_compressor.rb
177
192
  - test/plugin/test_gzip_compressor.rb
178
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_zstd_compressor.rb