fluent-plugin-webhdfs 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +12 -0
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +47 -3
- data/test/plugin/test_out_webhdfs.rb +30 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e43e061acd7552d5a0e482bf7ced1e319d1d107
|
4
|
+
data.tar.gz: 934ac37c1a6d985261ae573ce0724546481269ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be9c06348692d4f0506f0e7d8caa1fc4d6bfa4d683c1fdb0daa3fd87d4657ddbebaff4380c3fc03405030836cabc2fb856e02754aa8bccff1ddd5f7c7b49980f
|
7
|
+
data.tar.gz: fee1d87a57a64ea65ca14a2f3f7765eb115bd362d504f0ccade232132a384b4d8f599ccd264c8cdcb2f326c4a8062b7c5e456e4cf11c8cc7fab5d78dadf09e3a
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -120,6 +120,18 @@ With kerberos authentication:
|
|
120
120
|
kerberos true
|
121
121
|
</match>
|
122
122
|
|
123
|
+
If you want to compress data before storing it:
|
124
|
+
|
125
|
+
<match access.**>
|
126
|
+
type webhdfs
|
127
|
+
host namenode.your.cluster.local
|
128
|
+
port 50070
|
129
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H
|
130
|
+
compress gzip # currently only support gzip
|
131
|
+
</match>
|
132
|
+
|
133
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path.
|
134
|
+
|
123
135
|
### Namenode HA / Auto retry for WebHDFS known errors
|
124
136
|
|
125
137
|
`fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "0.4.
|
5
|
+
gem.version = "0.4.1"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -50,12 +50,20 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
50
50
|
when 'peer'
|
51
51
|
:peer
|
52
52
|
else
|
53
|
-
raise ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
|
53
|
+
raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
config_param :kerberos, :bool, :default => false
|
58
58
|
|
59
|
+
SUPPORTED_COMPRESS = ['gzip']
|
60
|
+
config_param :compress, :default => nil do |val|
|
61
|
+
unless SUPPORTED_COMPRESS.include? val
|
62
|
+
raise Fluent::ConfigError, "unsupported compress: #{val}"
|
63
|
+
end
|
64
|
+
val
|
65
|
+
end
|
66
|
+
|
59
67
|
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
60
68
|
|
61
69
|
def initialize
|
@@ -216,16 +224,52 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
216
224
|
end
|
217
225
|
end
|
218
226
|
|
219
|
-
def
|
227
|
+
def generate_path(chunk)
|
220
228
|
hdfs_path = if @append
|
221
229
|
path_format(chunk.key)
|
222
230
|
else
|
223
231
|
path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
|
224
232
|
end
|
233
|
+
if @compress
|
234
|
+
case @compress
|
235
|
+
when 'gzip'
|
236
|
+
hdfs_path = "#{hdfs_path}.gz"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
hdfs_path
|
240
|
+
end
|
241
|
+
|
242
|
+
def compress_context(chunk, &block)
|
243
|
+
case @compress
|
244
|
+
when 'gzip'
|
245
|
+
require 'zlib'
|
246
|
+
require 'tempfile'
|
247
|
+
tmp = Tempfile.new("webhdfs-")
|
248
|
+
begin
|
249
|
+
w = Zlib::GzipWriter.new(tmp)
|
250
|
+
chunk.write_to(w)
|
251
|
+
w.close
|
252
|
+
tmp.close
|
253
|
+
tmp.open
|
254
|
+
yield tmp
|
255
|
+
ensure
|
256
|
+
tmp.close(true) rescue nil
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def write(chunk)
|
262
|
+
hdfs_path = generate_path(chunk)
|
225
263
|
|
226
264
|
failovered = false
|
227
265
|
begin
|
228
|
-
|
266
|
+
if @compress
|
267
|
+
compress_context(chunk) do |data|
|
268
|
+
send_data(hdfs_path, data)
|
269
|
+
end
|
270
|
+
else
|
271
|
+
send_data(hdfs_path, chunk.read)
|
272
|
+
end
|
229
273
|
rescue => e
|
230
274
|
log.warn "failed to communicate hdfs cluster, path: #{hdfs_path}"
|
231
275
|
|
@@ -57,6 +57,17 @@ kerberos true
|
|
57
57
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
58
58
|
assert_equal :peer, d.instance.ssl_verify_mode
|
59
59
|
assert_equal true, d.instance.kerberos
|
60
|
+
|
61
|
+
d = create_driver %[
|
62
|
+
namenode server.local:14000
|
63
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
64
|
+
compress gzip
|
65
|
+
]
|
66
|
+
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
67
|
+
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
68
|
+
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
69
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
70
|
+
assert_equal 'gzip', d.instance.compress
|
60
71
|
end
|
61
72
|
|
62
73
|
def test_configure_placeholders
|
@@ -90,4 +101,23 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
|
|
90
101
|
]
|
91
102
|
end
|
92
103
|
end
|
104
|
+
|
105
|
+
def test_invalid_configure
|
106
|
+
assert_raise Fluent::ConfigError do
|
107
|
+
create_driver %[
|
108
|
+
namenode server.local:14000
|
109
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
110
|
+
ssl true
|
111
|
+
ssl_verify_mode invalid
|
112
|
+
]
|
113
|
+
end
|
114
|
+
assert_raise Fluent::ConfigError do
|
115
|
+
create_driver %[
|
116
|
+
namenode server.local:14000
|
117
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
118
|
+
compress invalid
|
119
|
+
]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
93
123
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10
|
11
|
+
date: 2014-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|