fluent-plugin-webhdfs 1.2.3 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 39cdb79659d5d95293f47ef4c3ae99e8827a3af9
4
- data.tar.gz: 91674f2e77c578f1f7761c41e971495541c30043
2
+ SHA256:
3
+ metadata.gz: ea946e8bbbf059043af07bb698aba729d7e5b2ecc7699b3275c34cfde49b3412
4
+ data.tar.gz: 31a42272f10bb2e16d60a93a923c3de1882cef45f05e76a5c38d86620d927795
5
5
  SHA512:
6
- metadata.gz: 6d72ec81812d8fb264ccde08e0dfb4c4d06605f488509cff5046539e070e840fe2ca9b7e0caa481778f325b1dbf3a4bda2fe632cc007d1be6053c9dceb939de0
7
- data.tar.gz: 3c258d6362c05f6778bbf5d6b9e18d96565ed4914d15c8988bfd0334d14ccbdb1d4d3c26073b8d8700931a64cb9667b7c3413db2d87aaae693b78be3f3341a23
6
+ metadata.gz: 8d0527a147d497f309ef9c4d965ad87348e01e96eabe073810e2c613ba8473988866604a57dd7524d8a66ec4f23bba07d40d4f03d16b28847697a8b7717f9c36
7
+ data.tar.gz: d03c99a56a7a0e34424c923f0b3179bf038e46dee4a765142adbb9944dfe9c72d41c27a80c6757fdfb63f071a00f390dac1e74275a884406a3aa65b065481723
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ vendor/
@@ -2,10 +2,11 @@ sudo: false
2
2
  language: ruby
3
3
 
4
4
  rvm:
5
- - 2.1
6
- - 2.2
7
- - 2.3.3
8
- - 2.4.0
5
+ - 2.4
6
+ - 2.5
7
+ - 2.6
8
+ - 2.7
9
+ - ruby-head
9
10
 
10
11
  branches:
11
12
  only:
@@ -23,3 +24,7 @@ script: bundle exec rake test
23
24
 
24
25
  gemfile:
25
26
  - Gemfile
27
+
28
+ matrix:
29
+ allow_failures:
30
+ - rvm: ruby-head
data/README.md CHANGED
@@ -157,14 +157,31 @@ If you want to compress data before storing it:
157
157
  host namenode.your.cluster.local
158
158
  port 50070
159
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
160
- compress gzip # or 'bzip2', 'snappy', 'lzo_command'
160
+ compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
161
161
  </match>
162
162
 
163
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
164
164
  Note that you have to install additional gem for several compress algorithms:
165
165
 
166
166
  - snappy: install snappy gem
167
167
  - bzip2: install bzip2-ffi gem
168
+ - zstd: install zstandard gem
169
+
170
+ Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
171
+
172
+ If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
173
+
174
+ <match access.**>
175
+ @type webhdfs
176
+ host namenode.your.cluster.local
177
+ port 50070
178
+ path /path/on/hdfs/access.log.%Y%m%d_%H
179
+ compress snappy
180
+ extension ".snappy"
181
+ </match>
182
+
183
+ With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
184
+ This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
168
185
 
169
186
  ### Namenode HA / Auto retry for WebHDFS known errors
170
187
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.3"
5
+ gem.version = "1.3.2"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_development_dependency "zstandard"
25
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
26
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
27
  end
@@ -67,10 +67,13 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
74
+ desc 'HDFS file extensions (overrides default compressor extensions)'
75
+ config_param :extension, :string, default: nil
76
+
74
77
  config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
75
78
  config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
76
79
  config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
@@ -267,17 +270,22 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
267
270
  end
268
271
 
269
272
  def send_data(path, data)
270
- if @append
271
- begin
272
- @client.append(path, data)
273
- rescue WebHDFS::FileNotFoundError
274
- @client.create(path, data)
275
- end
273
+ return @client.create(path, data, {'overwrite' => 'true'}) unless @append
274
+
275
+ if path_exists?(path)
276
+ @client.append(path, data)
276
277
  else
277
- @client.create(path, data, {'overwrite' => 'true'})
278
+ @client.create(path, data)
278
279
  end
279
280
  end
280
281
 
282
+ def path_exists?(path)
283
+ @client.stat(path)
284
+ true
285
+ rescue WebHDFS::FileNotFoundError
286
+ false
287
+ end
288
+
281
289
  HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
282
290
  UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
283
291
  UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
@@ -315,11 +323,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
315
323
 
316
324
  def generate_path(chunk)
317
325
  hdfs_path = if @append
318
- extract_placeholders(@path, chunk.metadata)
326
+ extract_placeholders(@path, chunk)
319
327
  else
320
- extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk.metadata)
328
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
321
329
  end
322
- hdfs_path = "#{hdfs_path}#{@compressor.ext}"
330
+ hdfs_ext = @extension || @compressor.ext
331
+ hdfs_path = "#{hdfs_path}#{hdfs_ext}"
323
332
  if @replace_random_uuid
324
333
  uuid_random = SecureRandom.uuid
325
334
  hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
@@ -519,3 +528,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
519
528
  require 'fluent/plugin/webhdfs_compressor_bzip2'
520
529
  require 'fluent/plugin/webhdfs_compressor_snappy'
521
530
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
531
+ require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -0,0 +1,24 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class ZstdCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('zstd', self)
5
+
6
+ def initialize(options = {})
7
+ begin
8
+ require "zstandard"
9
+ rescue LoadError
10
+ raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
11
+ end
12
+ end
13
+
14
+ def ext
15
+ ".zst"
16
+ end
17
+
18
+ def compress(chunk, tmp)
19
+ tmp.binmode
20
+ tmp.write Zstandard.deflate(chunk.read)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -126,6 +126,26 @@ class WebHDFSOutputTest < Test::Unit::TestCase
126
126
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
127
127
  assert_equal compress_type, d.instance.compress
128
128
  assert_equal compressor_class, d.instance.compressor.class
129
+
130
+ time = event_time("2020-10-03 15:07:00 +0300")
131
+ metadata = d.instance.metadata("test", time, {})
132
+ chunk = d.instance.buffer.generate_chunk(metadata)
133
+ assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
134
+ end
135
+
136
+ def test_explicit_extensions
137
+ conf = config_element(
138
+ "ROOT", "", {
139
+ "host" => "namenode.local",
140
+ "path" => "/hdfs/path/file.%Y%m%d.log",
141
+ "compress" => "snappy",
142
+ "extension" => ".snappy"
143
+ })
144
+ d = create_driver(conf)
145
+ time = event_time("2020-10-07 15:15:00 +0300")
146
+ metadata = d.instance.metadata("test", time, {})
147
+ chunk = d.instance.buffer.generate_chunk(metadata)
148
+ assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
129
149
  end
130
150
 
131
151
  def test_placeholders_old_style
@@ -0,0 +1,55 @@
1
+ require "helper"
2
+ require "fluent/plugin/buf_memory"
3
+ require 'zstandard'
4
+
5
+ class ZstdCompressorTest < Test::Unit::TestCase
6
+ class Zstd < self
7
+
8
+ CONFIG = %[
9
+ host namenode.local
10
+ path /hdfs/path/file.%Y%m%d.log
11
+ ]
12
+
13
+ def setup
14
+ omit unless Object.const_defined?(:Zstandard)
15
+ Fluent::Test.setup
16
+ @compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
17
+ end
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
21
+ end
22
+
23
+ def test_ext
24
+ assert_equal(".zst", @compressor.ext)
25
+ end
26
+
27
+ def test_compress
28
+ d = create_driver
29
+ if d.instance.respond_to?(:buffer)
30
+ buffer = d.instance.buffer
31
+ else
32
+ buffer = d.instance.instance_variable_get(:@buffer)
33
+ end
34
+
35
+ if buffer.respond_to?(:generate_chunk)
36
+ chunk = buffer.generate_chunk("test")
37
+ chunk.concat("hello gzip\n" * 32 * 1024, 1)
38
+ else
39
+ chunk = buffer.new_chunk("test")
40
+ chunk << "hello gzip\n" * 32 * 1024
41
+ end
42
+
43
+ io = Tempfile.new("zstd-")
44
+ @compressor.compress(chunk, io)
45
+ assert !io.closed?
46
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
47
+ assert(chunk_bytesize > io.read.bytesize)
48
+ io.rewind
49
+ reader = Zstandard.inflate(io.read)
50
+ io.rewind
51
+ assert_equal(chunk.read, reader)
52
+ io.close
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-21 00:00:00.000000000 Z
11
+ date: 2020-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,34 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: zstandard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: fluentd
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: 0.14.4
117
+ version: 0.14.22
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: 0.14.4
124
+ version: 0.14.22
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: webhdfs
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -143,10 +157,12 @@ files:
143
157
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
144
158
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
145
159
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
+ - lib/fluent/plugin/webhdfs_compressor_zstd.rb
146
161
  - test/helper.rb
147
162
  - test/plugin/test_compressor.rb
148
163
  - test/plugin/test_gzip_compressor.rb
149
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_zstd_compressor.rb
150
166
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
151
167
  licenses:
152
168
  - Apache-2.0
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
182
  - !ruby/object:Gem::Version
167
183
  version: '0'
168
184
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.13
185
+ rubygems_version: 3.0.3
171
186
  signing_key:
172
187
  specification_version: 4
173
188
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
@@ -176,3 +191,4 @@ test_files:
176
191
  - test/plugin/test_compressor.rb
177
192
  - test/plugin/test_gzip_compressor.rb
178
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_zstd_compressor.rb