fluent-plugin-webhdfs 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4637e563a9e04c877cd6d897d013ac57a63d1a80
4
- data.tar.gz: 305a8e89b883175c47b4fc9811fd1a27f9773141
2
+ SHA256:
3
+ metadata.gz: 64bdfcd21e62ec4f83f7807b7b7a60e51f0ed68f2d015f39424dd47b76473be3
4
+ data.tar.gz: cbf248448c4eaa9b35689076ac0010753af7eb138d24cd377c43bcf5dfd46595
5
5
  SHA512:
6
- metadata.gz: ded6f6b4ba2ca28fa49be0a4a23127ec643d1adfc046c7cb14d37cc91ad90705252ac7cc986439c0793849c876ffcf88c5b6abc21a4ba9f7284c6933e07a8632
7
- data.tar.gz: 1e1d5b936463bfab84baeec2cf9b6064dcd25890712f8d315b5a18457dc87087c4270a2a2f1cdf0a9e390c91345bb9ac1eb739088644537c0a6bae1564cf1325
6
+ metadata.gz: d76806177dc1ff5e33dbf4fcc0e1fd0b3a47bd1a4fa1e51287a033ba4ccb94c750a540f735e1144be0bd502629c05ee19d74462ab2fe6831979d378b35686d20
7
+ data.tar.gz: e7d195870652a72c6d3edea039336742e3564a01ac9871036ddc4dcb3d0eb58bb4b9c73ac6598ea2712ec1ec17870e9ee6d091a2a89204688fb3935aa5ccc34b
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ vendor/
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # fluent-plugin-webhdfs
2
2
 
3
+ [![Build Status](https://travis-ci.org/fluent/fluent-plugin-webhdfs.svg?branch=master)](https://travis-ci.org/fluent/fluent-plugin-webhdfs)
4
+
3
5
  [Fluentd](http://fluentd.org/) output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
4
6
 
5
7
  "webhdfs" output plugin formats data into plain text, and store it as files on HDFS. This plugin supports:
@@ -155,14 +157,31 @@ If you want to compress data before storing it:
155
157
  host namenode.your.cluster.local
156
158
  port 50070
157
159
  path /path/on/hdfs/access.log.%Y%m%d_%H
158
- compress gzip # or 'bzip2', 'snappy', 'lzo_command'
160
+ compress gzip # or 'bzip2', 'snappy', 'lzo_command', 'zstd'
159
161
  </match>
160
162
 
161
- Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`).
163
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `sz`, `.lzo`, `.zst`).
162
164
  Note that you have to install additional gem for several compress algorithms:
163
165
 
164
166
  - snappy: install snappy gem
165
167
  - bzip2: install bzip2-ffi gem
168
+ - zstd: install zstandard gem
169
+
170
+ Note that zstd will require installation of the libzstd native library. See the [zstandard-ruby](https://github.com/msievers/zstandard-ruby#examples-for-installing-libzstd) repo for infomration on the required packages for your operating system.
171
+
172
+ If you want to explicitly specify file extensions in HDFS (override default compressor extensions):
173
+
174
+ <match access.**>
175
+ @type webhdfs
176
+ host namenode.your.cluster.local
177
+ port 50070
178
+ path /path/on/hdfs/access.log.%Y%m%d_%H
179
+ compress snappy
180
+ extension ".snappy"
181
+ </match>
182
+
183
+ With this configuration paths in HDFS will be like `/path/on/hdfs/access.log.20201003_12.snappy`.
184
+ This one may be useful when (for example) you need to use snappy codec but `.sz` files are not recognized as snappy files in HDFS.
166
185
 
167
186
  ### Namenode HA / Auto retry for WebHDFS known errors
168
187
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "1.2.2"
5
+ gem.version = "1.3.1"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "appraisal"
22
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
23
  gem.add_development_dependency "bzip2-ffi"
24
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
24
+ gem.add_development_dependency "zstandard"
25
+ gem.add_runtime_dependency "fluentd", '>= 0.14.22'
25
26
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
27
  end
@@ -67,10 +67,13 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
67
67
  desc 'kerberos keytab file'
68
68
  config_param :kerberos_keytab, :string, default: nil
69
69
 
70
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
70
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :zstd, :text]
71
71
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
72
72
  config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
73
73
 
74
+ desc 'HDFS file extensions (overrides default compressor extensions)'
75
+ config_param :extension, :string, default: nil
76
+
74
77
  config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
75
78
  config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
76
79
  config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
@@ -98,20 +101,27 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
98
101
  end
99
102
 
100
103
  def configure(conf)
101
- compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
102
-
104
+ # #compat_parameters_convert ignore time format in conf["path"],
105
+ # so check conf["path"] and overwrite the default value later if needed
103
106
  timekey = case conf["path"]
104
107
  when /%S/ then 1
105
108
  when /%M/ then 60
106
109
  when /%H/ then 3600
107
110
  else 86400
108
111
  end
112
+ if buffer_config = conf.elements(name: "buffer").first
113
+ timekey = buffer_config["timekey"] || timekey
114
+ end
115
+
116
+ compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
117
+
109
118
  if conf.elements(name: "buffer").empty?
110
119
  e = Fluent::Config::Element.new("buffer", "time", {}, [])
111
120
  conf.elements << e
112
121
  end
113
122
  buffer_config = conf.elements(name: "buffer").first
114
- buffer_config["timekey"] = timekey unless buffer_config["timekey"]
123
+ # explicitly set timekey
124
+ buffer_config["timekey"] = timekey
115
125
 
116
126
  compat_parameters_convert_plaintextformatter(conf)
117
127
  verify_config_placeholders_in_path!(conf)
@@ -308,11 +318,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
308
318
 
309
319
  def generate_path(chunk)
310
320
  hdfs_path = if @append
311
- extract_placeholders(@path, chunk.metadata)
321
+ extract_placeholders(@path, chunk)
312
322
  else
313
- extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk.metadata)
323
+ extract_placeholders(@path.gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id_hex(chunk.unique_id)), chunk)
314
324
  end
315
- hdfs_path = "#{hdfs_path}#{@compressor.ext}"
325
+ hdfs_ext = @extension || @compressor.ext
326
+ hdfs_path = "#{hdfs_path}#{hdfs_ext}"
316
327
  if @replace_random_uuid
317
328
  uuid_random = SecureRandom.uuid
318
329
  hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
@@ -512,3 +523,4 @@ require 'fluent/plugin/webhdfs_compressor_gzip'
512
523
  require 'fluent/plugin/webhdfs_compressor_bzip2'
513
524
  require 'fluent/plugin/webhdfs_compressor_snappy'
514
525
  require 'fluent/plugin/webhdfs_compressor_lzo_command'
526
+ require 'fluent/plugin/webhdfs_compressor_zstd'
@@ -16,9 +16,10 @@ module Fluent::Plugin
16
16
  end
17
17
 
18
18
  def compress(chunk, tmp)
19
- w = Snappy::Writer.new(tmp)
20
- chunk.write_to(w)
21
- w.close
19
+ Snappy::Writer.new(tmp) do |w|
20
+ w << chunk.read
21
+ w.flush
22
+ end
22
23
  end
23
24
  end
24
25
  end
@@ -0,0 +1,24 @@
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
+ class ZstdCompressor < Compressor
4
+ WebHDFSOutput.register_compressor('zstd', self)
5
+
6
+ def initialize(options = {})
7
+ begin
8
+ require "zstandard"
9
+ rescue LoadError
10
+ raise Fluent::ConfigError, "Install zstandard gem before use of zstd compressor"
11
+ end
12
+ end
13
+
14
+ def ext
15
+ ".zst"
16
+ end
17
+
18
+ def compress(chunk, tmp)
19
+ tmp.binmode
20
+ tmp.write Zstandard.deflate(chunk.read)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -126,6 +126,26 @@ class WebHDFSOutputTest < Test::Unit::TestCase
126
126
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
127
127
  assert_equal compress_type, d.instance.compress
128
128
  assert_equal compressor_class, d.instance.compressor.class
129
+
130
+ time = event_time("2020-10-03 15:07:00 +0300")
131
+ metadata = d.instance.metadata("test", time, {})
132
+ chunk = d.instance.buffer.generate_chunk(metadata)
133
+ assert_equal "/hdfs/path/file.20201003.1507.log#{d.instance.compressor.ext}", d.instance.generate_path(chunk)
134
+ end
135
+
136
+ def test_explicit_extensions
137
+ conf = config_element(
138
+ "ROOT", "", {
139
+ "host" => "namenode.local",
140
+ "path" => "/hdfs/path/file.%Y%m%d.log",
141
+ "compress" => "snappy",
142
+ "extension" => ".snappy"
143
+ })
144
+ d = create_driver(conf)
145
+ time = event_time("2020-10-07 15:15:00 +0300")
146
+ metadata = d.instance.metadata("test", time, {})
147
+ chunk = d.instance.buffer.generate_chunk(metadata)
148
+ assert_equal "/hdfs/path/file.20201007.log.snappy", d.instance.generate_path(chunk)
129
149
  end
130
150
 
131
151
  def test_placeholders_old_style
@@ -207,6 +227,21 @@ class WebHDFSOutputTest < Test::Unit::TestCase
207
227
  assert_equal 1, d.instance.buffer_config.timekey
208
228
  assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
209
229
  end
230
+
231
+ def test_time_key_without_buffer_section
232
+ conf = config_element(
233
+ "ROOT", "", {
234
+ "host" => "namenode.local",
235
+ "path" => "/hdfs/path/file.%Y%m%d-%M.log"
236
+ }
237
+ )
238
+ d = create_driver(conf)
239
+ time = event_time("2012-07-18 15:03:00 +0900")
240
+ metadata = d.instance.metadata("test", time, {})
241
+ chunk = d.instance.buffer.generate_chunk(metadata)
242
+ assert_equal 60, d.instance.buffer_config.timekey
243
+ assert_equal "/hdfs/path/file.20120718-03.log", d.instance.generate_path(chunk)
244
+ end
210
245
  end
211
246
 
212
247
  sub_test_case "using format subsection" do
@@ -0,0 +1,55 @@
1
+ require "helper"
2
+ require "fluent/plugin/buf_memory"
3
+ require 'zstandard'
4
+
5
+ class ZstdCompressorTest < Test::Unit::TestCase
6
+ class Zstd < self
7
+
8
+ CONFIG = %[
9
+ host namenode.local
10
+ path /hdfs/path/file.%Y%m%d.log
11
+ ]
12
+
13
+ def setup
14
+ omit unless Object.const_defined?(:Zstandard)
15
+ Fluent::Test.setup
16
+ @compressor = Fluent::Plugin::WebHDFSOutput::ZstdCompressor.new
17
+ end
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
21
+ end
22
+
23
+ def test_ext
24
+ assert_equal(".zst", @compressor.ext)
25
+ end
26
+
27
+ def test_compress
28
+ d = create_driver
29
+ if d.instance.respond_to?(:buffer)
30
+ buffer = d.instance.buffer
31
+ else
32
+ buffer = d.instance.instance_variable_get(:@buffer)
33
+ end
34
+
35
+ if buffer.respond_to?(:generate_chunk)
36
+ chunk = buffer.generate_chunk("test")
37
+ chunk.concat("hello gzip\n" * 32 * 1024, 1)
38
+ else
39
+ chunk = buffer.new_chunk("test")
40
+ chunk << "hello gzip\n" * 32 * 1024
41
+ end
42
+
43
+ io = Tempfile.new("zstd-")
44
+ @compressor.compress(chunk, io)
45
+ assert !io.closed?
46
+ chunk_bytesize = chunk.respond_to?(:bytesize) ? chunk.bytesize : chunk.size
47
+ assert(chunk_bytesize > io.read.bytesize)
48
+ io.rewind
49
+ reader = Zstandard.inflate(io.read)
50
+ io.rewind
51
+ assert_equal(chunk.read, reader)
52
+ io.close
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-23 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,34 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: zstandard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: fluentd
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: 0.14.4
117
+ version: 0.14.22
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: 0.14.4
124
+ version: 0.14.22
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: webhdfs
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -143,10 +157,12 @@ files:
143
157
  - lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
144
158
  - lib/fluent/plugin/webhdfs_compressor_snappy.rb
145
159
  - lib/fluent/plugin/webhdfs_compressor_text.rb
160
+ - lib/fluent/plugin/webhdfs_compressor_zstd.rb
146
161
  - test/helper.rb
147
162
  - test/plugin/test_compressor.rb
148
163
  - test/plugin/test_gzip_compressor.rb
149
164
  - test/plugin/test_out_webhdfs.rb
165
+ - test/plugin/test_zstd_compressor.rb
150
166
  homepage: https://github.com/fluent/fluent-plugin-webhdfs
151
167
  licenses:
152
168
  - Apache-2.0
@@ -166,8 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
182
  - !ruby/object:Gem::Version
167
183
  version: '0'
168
184
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.6.11
185
+ rubygems_version: 3.0.3
171
186
  signing_key:
172
187
  specification_version: 4
173
188
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
@@ -176,3 +191,4 @@ test_files:
176
191
  - test/plugin/test_compressor.rb
177
192
  - test/plugin/test_gzip_compressor.rb
178
193
  - test/plugin/test_out_webhdfs.rb
194
+ - test/plugin/test_zstd_compressor.rb