fluent-plugin-webhdfs 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e67f69a6f94c64ebe9b463b262111626a5cfb0e3
4
- data.tar.gz: 9786c0456e296851caf6001898c8e30751724b04
3
+ metadata.gz: 5e43e061acd7552d5a0e482bf7ced1e319d1d107
4
+ data.tar.gz: 934ac37c1a6d985261ae573ce0724546481269ff
5
5
  SHA512:
6
- metadata.gz: 6766521d3edaf773135c7d948ba0f8989241535795f7a38d61961c3dd9823df83955c0e4f817b029600c8e83495e24443015d2b863f68dd911dc6ea7ca5f8274
7
- data.tar.gz: 3834bcb691d9d5c86df91169f44f77a9aaad09754d849919138be5ce403271b3a168c3620a0bbafc80bd8f3a2553cc0b3fd97bc14e794c0637dd1cc06dc3c7ec
6
+ metadata.gz: be9c06348692d4f0506f0e7d8caa1fc4d6bfa4d683c1fdb0daa3fd87d4657ddbebaff4380c3fc03405030836cabc2fb856e02754aa8bccff1ddd5f7c7b49980f
7
+ data.tar.gz: fee1d87a57a64ea65ca14a2f3f7765eb115bd362d504f0ccade232132a384b4d8f599ccd264c8cdcb2f326c4a8062b7c5e456e4cf11c8cc7fab5d78dadf09e3a
data/.travis.yml CHANGED
@@ -1,9 +1,9 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.2
5
4
  - 1.9.3
6
5
  - 2.0.0
6
+ - 2.1.1
7
7
 
8
8
  branches:
9
9
  only:
data/README.md CHANGED
@@ -120,6 +120,18 @@ With kerberos authentication:
120
120
  kerberos true
121
121
  </match>
122
122
 
123
+ If you want to compress data before storing it:
124
+
125
+ <match access.**>
126
+ type webhdfs
127
+ host namenode.your.cluster.local
128
+ port 50070
129
+ path /path/on/hdfs/access.log.%Y%m%d_%H
130
+ compress gzip # currently only support gzip
131
+ </match>
132
+
133
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path.
134
+
123
135
  ### Namenode HA / Auto retry for WebHDFS known errors
124
136
 
125
137
  `fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.4.0"
5
+ gem.version = "0.4.1"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -50,12 +50,20 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
50
50
  when 'peer'
51
51
  :peer
52
52
  else
53
- raise ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
53
+ raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
54
54
  end
55
55
  end
56
56
 
57
57
  config_param :kerberos, :bool, :default => false
58
58
 
59
+ SUPPORTED_COMPRESS = ['gzip']
60
+ config_param :compress, :default => nil do |val|
61
+ unless SUPPORTED_COMPRESS.include? val
62
+ raise Fluent::ConfigError, "unsupported compress: #{val}"
63
+ end
64
+ val
65
+ end
66
+
59
67
  CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
60
68
 
61
69
  def initialize
@@ -216,16 +224,52 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
216
224
  end
217
225
  end
218
226
 
219
- def write(chunk)
227
+ def generate_path(chunk)
220
228
  hdfs_path = if @append
221
229
  path_format(chunk.key)
222
230
  else
223
231
  path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
224
232
  end
233
+ if @compress
234
+ case @compress
235
+ when 'gzip'
236
+ hdfs_path = "#{hdfs_path}.gz"
237
+ end
238
+ end
239
+ hdfs_path
240
+ end
241
+
242
+ def compress_context(chunk, &block)
243
+ case @compress
244
+ when 'gzip'
245
+ require 'zlib'
246
+ require 'tempfile'
247
+ tmp = Tempfile.new("webhdfs-")
248
+ begin
249
+ w = Zlib::GzipWriter.new(tmp)
250
+ chunk.write_to(w)
251
+ w.close
252
+ tmp.close
253
+ tmp.open
254
+ yield tmp
255
+ ensure
256
+ tmp.close(true) rescue nil
257
+ end
258
+ end
259
+ end
260
+
261
+ def write(chunk)
262
+ hdfs_path = generate_path(chunk)
225
263
 
226
264
  failovered = false
227
265
  begin
228
- send_data(hdfs_path, chunk.read)
266
+ if @compress
267
+ compress_context(chunk) do |data|
268
+ send_data(hdfs_path, data)
269
+ end
270
+ else
271
+ send_data(hdfs_path, chunk.read)
272
+ end
229
273
  rescue => e
230
274
  log.warn "failed to communicate hdfs cluster, path: #{hdfs_path}"
231
275
 
@@ -57,6 +57,17 @@ kerberos true
57
57
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
58
58
  assert_equal :peer, d.instance.ssl_verify_mode
59
59
  assert_equal true, d.instance.kerberos
60
+
61
+ d = create_driver %[
62
+ namenode server.local:14000
63
+ path /hdfs/path/file.%Y%m%d.%H%M.log
64
+ compress gzip
65
+ ]
66
+ assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
67
+ assert_equal 14000, d.instance.instance_eval{ @namenode_port }
68
+ assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
69
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
70
+ assert_equal 'gzip', d.instance.compress
60
71
  end
61
72
 
62
73
  def test_configure_placeholders
@@ -90,4 +101,23 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
90
101
  ]
91
102
  end
92
103
  end
104
+
105
+ def test_invalid_configure
106
+ assert_raise Fluent::ConfigError do
107
+ create_driver %[
108
+ namenode server.local:14000
109
+ path /hdfs/path/file.%Y%m%d.%H%M.log
110
+ ssl true
111
+ ssl_verify_mode invalid
112
+ ]
113
+ end
114
+ assert_raise Fluent::ConfigError do
115
+ create_driver %[
116
+ namenode server.local:14000
117
+ path /hdfs/path/file.%Y%m%d.%H%M.log
118
+ compress invalid
119
+ ]
120
+ end
121
+ end
122
+
93
123
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-30 00:00:00.000000000 Z
11
+ date: 2014-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake