fluent-plugin-webhdfs 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e67f69a6f94c64ebe9b463b262111626a5cfb0e3
4
- data.tar.gz: 9786c0456e296851caf6001898c8e30751724b04
3
+ metadata.gz: 5e43e061acd7552d5a0e482bf7ced1e319d1d107
4
+ data.tar.gz: 934ac37c1a6d985261ae573ce0724546481269ff
5
5
  SHA512:
6
- metadata.gz: 6766521d3edaf773135c7d948ba0f8989241535795f7a38d61961c3dd9823df83955c0e4f817b029600c8e83495e24443015d2b863f68dd911dc6ea7ca5f8274
7
- data.tar.gz: 3834bcb691d9d5c86df91169f44f77a9aaad09754d849919138be5ce403271b3a168c3620a0bbafc80bd8f3a2553cc0b3fd97bc14e794c0637dd1cc06dc3c7ec
6
+ metadata.gz: be9c06348692d4f0506f0e7d8caa1fc4d6bfa4d683c1fdb0daa3fd87d4657ddbebaff4380c3fc03405030836cabc2fb856e02754aa8bccff1ddd5f7c7b49980f
7
+ data.tar.gz: fee1d87a57a64ea65ca14a2f3f7765eb115bd362d504f0ccade232132a384b4d8f599ccd264c8cdcb2f326c4a8062b7c5e456e4cf11c8cc7fab5d78dadf09e3a
data/.travis.yml CHANGED
@@ -1,9 +1,9 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.2
5
4
  - 1.9.3
6
5
  - 2.0.0
6
+ - 2.1.1
7
7
 
8
8
  branches:
9
9
  only:
data/README.md CHANGED
@@ -120,6 +120,18 @@ With kerberos authentication:
120
120
  kerberos true
121
121
  </match>
122
122
 
123
+ If you want to compress data before storing it:
124
+
125
+ <match access.**>
126
+ type webhdfs
127
+ host namenode.your.cluster.local
128
+ port 50070
129
+ path /path/on/hdfs/access.log.%Y%m%d_%H
130
+ compress gzip # currently only support gzip
131
+ </match>
132
+
133
+ Note that if you set `compress gzip`, then the suffix `.gz` will be added to path.
134
+
123
135
  ### Namenode HA / Auto retry for WebHDFS known errors
124
136
 
125
137
  `fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.4.0"
5
+ gem.version = "0.4.1"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -50,12 +50,20 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
50
50
  when 'peer'
51
51
  :peer
52
52
  else
53
- raise ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
53
+ raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
54
54
  end
55
55
  end
56
56
 
57
57
  config_param :kerberos, :bool, :default => false
58
58
 
59
+ SUPPORTED_COMPRESS = ['gzip']
60
+ config_param :compress, :default => nil do |val|
61
+ unless SUPPORTED_COMPRESS.include? val
62
+ raise Fluent::ConfigError, "unsupported compress: #{val}"
63
+ end
64
+ val
65
+ end
66
+
59
67
  CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
60
68
 
61
69
  def initialize
@@ -216,16 +224,52 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
216
224
  end
217
225
  end
218
226
 
219
- def write(chunk)
227
+ def generate_path(chunk)
220
228
  hdfs_path = if @append
221
229
  path_format(chunk.key)
222
230
  else
223
231
  path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
224
232
  end
233
+ if @compress
234
+ case @compress
235
+ when 'gzip'
236
+ hdfs_path = "#{hdfs_path}.gz"
237
+ end
238
+ end
239
+ hdfs_path
240
+ end
241
+
242
+ def compress_context(chunk, &block)
243
+ case @compress
244
+ when 'gzip'
245
+ require 'zlib'
246
+ require 'tempfile'
247
+ tmp = Tempfile.new("webhdfs-")
248
+ begin
249
+ w = Zlib::GzipWriter.new(tmp)
250
+ chunk.write_to(w)
251
+ w.close
252
+ tmp.close
253
+ tmp.open
254
+ yield tmp
255
+ ensure
256
+ tmp.close(true) rescue nil
257
+ end
258
+ end
259
+ end
260
+
261
+ def write(chunk)
262
+ hdfs_path = generate_path(chunk)
225
263
 
226
264
  failovered = false
227
265
  begin
228
- send_data(hdfs_path, chunk.read)
266
+ if @compress
267
+ compress_context(chunk) do |data|
268
+ send_data(hdfs_path, data)
269
+ end
270
+ else
271
+ send_data(hdfs_path, chunk.read)
272
+ end
229
273
  rescue => e
230
274
  log.warn "failed to communicate hdfs cluster, path: #{hdfs_path}"
231
275
 
@@ -57,6 +57,17 @@ kerberos true
57
57
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
58
58
  assert_equal :peer, d.instance.ssl_verify_mode
59
59
  assert_equal true, d.instance.kerberos
60
+
61
+ d = create_driver %[
62
+ namenode server.local:14000
63
+ path /hdfs/path/file.%Y%m%d.%H%M.log
64
+ compress gzip
65
+ ]
66
+ assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
67
+ assert_equal 14000, d.instance.instance_eval{ @namenode_port }
68
+ assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
69
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
70
+ assert_equal 'gzip', d.instance.compress
60
71
  end
61
72
 
62
73
  def test_configure_placeholders
@@ -90,4 +101,23 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
90
101
  ]
91
102
  end
92
103
  end
104
+
105
+ def test_invalid_configure
106
+ assert_raise Fluent::ConfigError do
107
+ create_driver %[
108
+ namenode server.local:14000
109
+ path /hdfs/path/file.%Y%m%d.%H%M.log
110
+ ssl true
111
+ ssl_verify_mode invalid
112
+ ]
113
+ end
114
+ assert_raise Fluent::ConfigError do
115
+ create_driver %[
116
+ namenode server.local:14000
117
+ path /hdfs/path/file.%Y%m%d.%H%M.log
118
+ compress invalid
119
+ ]
120
+ end
121
+ end
122
+
93
123
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-30 00:00:00.000000000 Z
11
+ date: 2014-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake