fluent-plugin-webhdfs 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +12 -0
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +47 -3
- data/test/plugin/test_out_webhdfs.rb +30 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e43e061acd7552d5a0e482bf7ced1e319d1d107
|
4
|
+
data.tar.gz: 934ac37c1a6d985261ae573ce0724546481269ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be9c06348692d4f0506f0e7d8caa1fc4d6bfa4d683c1fdb0daa3fd87d4657ddbebaff4380c3fc03405030836cabc2fb856e02754aa8bccff1ddd5f7c7b49980f
|
7
|
+
data.tar.gz: fee1d87a57a64ea65ca14a2f3f7765eb115bd362d504f0ccade232132a384b4d8f599ccd264c8cdcb2f326c4a8062b7c5e456e4cf11c8cc7fab5d78dadf09e3a
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -120,6 +120,18 @@ With kerberos authentication:
|
|
120
120
|
kerberos true
|
121
121
|
</match>
|
122
122
|
|
123
|
+
If you want to compress data before storing it:
|
124
|
+
|
125
|
+
<match access.**>
|
126
|
+
type webhdfs
|
127
|
+
host namenode.your.cluster.local
|
128
|
+
port 50070
|
129
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
130
|
+
compress gzip # currently only support gzip
|
131
|
+
</match>
|
132
|
+
|
133
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path.
|
134
|
+
|
123
135
|
### Namenode HA / Auto retry for WebHDFS known errors
|
124
136
|
|
125
137
|
`fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "0.4.
|
5
|
+
gem.version = "0.4.1"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -50,12 +50,20 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
50
50
|
when 'peer'
|
51
51
|
:peer
|
52
52
|
else
|
53
|
-
raise ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
|
53
|
+
raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
config_param :kerberos, :bool, :default => false
|
58
58
|
|
59
|
+
SUPPORTED_COMPRESS = ['gzip']
|
60
|
+
config_param :compress, :default => nil do |val|
|
61
|
+
unless SUPPORTED_COMPRESS.include? val
|
62
|
+
raise Fluent::ConfigError, "unsupported compress: #{val}"
|
63
|
+
end
|
64
|
+
val
|
65
|
+
end
|
66
|
+
|
59
67
|
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
60
68
|
|
61
69
|
def initialize
|
@@ -216,16 +224,52 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
216
224
|
end
|
217
225
|
end
|
218
226
|
|
219
|
-
def
|
227
|
+
def generate_path(chunk)
|
220
228
|
hdfs_path = if @append
|
221
229
|
path_format(chunk.key)
|
222
230
|
else
|
223
231
|
path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
|
224
232
|
end
|
233
|
+
if @compress
|
234
|
+
case @compress
|
235
|
+
when 'gzip'
|
236
|
+
hdfs_path = "#{hdfs_path}.gz"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
hdfs_path
|
240
|
+
end
|
241
|
+
|
242
|
+
def compress_context(chunk, &block)
|
243
|
+
case @compress
|
244
|
+
when 'gzip'
|
245
|
+
require 'zlib'
|
246
|
+
require 'tempfile'
|
247
|
+
tmp = Tempfile.new("webhdfs-")
|
248
|
+
begin
|
249
|
+
w = Zlib::GzipWriter.new(tmp)
|
250
|
+
chunk.write_to(w)
|
251
|
+
w.close
|
252
|
+
tmp.close
|
253
|
+
tmp.open
|
254
|
+
yield tmp
|
255
|
+
ensure
|
256
|
+
tmp.close(true) rescue nil
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def write(chunk)
|
262
|
+
hdfs_path = generate_path(chunk)
|
225
263
|
|
226
264
|
failovered = false
|
227
265
|
begin
|
228
|
-
|
266
|
+
if @compress
|
267
|
+
compress_context(chunk) do |data|
|
268
|
+
send_data(hdfs_path, data)
|
269
|
+
end
|
270
|
+
else
|
271
|
+
send_data(hdfs_path, chunk.read)
|
272
|
+
end
|
229
273
|
rescue => e
|
230
274
|
log.warn "failed to communicate hdfs cluster, path: #{hdfs_path}"
|
231
275
|
|
@@ -57,6 +57,17 @@ kerberos true
|
|
57
57
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
58
58
|
assert_equal :peer, d.instance.ssl_verify_mode
|
59
59
|
assert_equal true, d.instance.kerberos
|
60
|
+
|
61
|
+
d = create_driver %[
|
62
|
+
namenode server.local:14000
|
63
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
64
|
+
compress gzip
|
65
|
+
]
|
66
|
+
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
67
|
+
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
68
|
+
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
69
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
70
|
+
assert_equal 'gzip', d.instance.compress
|
60
71
|
end
|
61
72
|
|
62
73
|
def test_configure_placeholders
|
@@ -90,4 +101,23 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
|
|
90
101
|
]
|
91
102
|
end
|
92
103
|
end
|
104
|
+
|
105
|
+
def test_invalid_configure
|
106
|
+
assert_raise Fluent::ConfigError do
|
107
|
+
create_driver %[
|
108
|
+
namenode server.local:14000
|
109
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
110
|
+
ssl true
|
111
|
+
ssl_verify_mode invalid
|
112
|
+
]
|
113
|
+
end
|
114
|
+
assert_raise Fluent::ConfigError do
|
115
|
+
create_driver %[
|
116
|
+
namenode server.local:14000
|
117
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
118
|
+
compress invalid
|
119
|
+
]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
93
123
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10
|
11
|
+
date: 2014-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|