fluent-plugin-webhdfs 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/Appraisals +4 -0
- data/README.md +20 -20
- data/fluent-plugin-webhdfs.gemspec +3 -2
- data/gemfiles/fluentd_v0.12.gemfile +7 -0
- data/lib/fluent/plugin/out_webhdfs.rb +2 -1
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +31 -0
- data/test/plugin/test_out_webhdfs.rb +2 -1
- metadata +22 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c6a284ca4dbae16f8154b790f6201d37f75b27a1
|
|
4
|
+
data.tar.gz: bbdd1477dd222f2a44308e1afcb7ec1ddd0f57ac
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 986dd5af2013f754606db0a51abbfd1c4870e7e4c2cebf759bb27c02d9e73f1b7c1c1f2b5d57a53a5578ae65febcc20bb4022dcabbcef90c0ce40f34a660d433
|
|
7
|
+
data.tar.gz: 8768473997ac56b9af81c517e671a3c22ecf5048a841cc1b119c53ac6952b133751c7206b55237d06fc8268d152ccd454559360f270943172d6d105cc6ba9d5a
|
data/.travis.yml
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
sudo: false
|
|
1
2
|
language: ruby
|
|
2
3
|
|
|
3
4
|
rvm:
|
|
4
5
|
- 2.0.0
|
|
5
6
|
- 2.1
|
|
6
7
|
- 2.2
|
|
8
|
+
- 2.3.0
|
|
7
9
|
|
|
8
10
|
branches:
|
|
9
11
|
only:
|
|
@@ -13,3 +15,7 @@ before_install:
|
|
|
13
15
|
- gem update bundler
|
|
14
16
|
|
|
15
17
|
script: bundle exec rake test
|
|
18
|
+
|
|
19
|
+
gemfile:
|
|
20
|
+
- Gemfile
|
|
21
|
+
- gemfiles/fluentd_v0.12.gemfile
|
data/Appraisals
ADDED
data/README.md
CHANGED
|
@@ -17,10 +17,10 @@ And you can specify output file path as 'path /path/to/dir/access.%Y%m%d.log', t
|
|
|
17
17
|
|
|
18
18
|
### WebHDFSOutput
|
|
19
19
|
|
|
20
|
-
To store data by time,tag,json (same with 'type file') over WebHDFS:
|
|
20
|
+
To store data by time,tag,json (same with '@type file') over WebHDFS:
|
|
21
21
|
|
|
22
22
|
<match access.**>
|
|
23
|
-
type webhdfs
|
|
23
|
+
@type webhdfs
|
|
24
24
|
host namenode.your.cluster.local
|
|
25
25
|
port 50070
|
|
26
26
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -29,7 +29,7 @@ To store data by time,tag,json (same with 'type file') over WebHDFS:
|
|
|
29
29
|
If you want JSON object only (without time or tag or both on header of lines), specify it by `output_include_time` or `output_include_tag` (default true):
|
|
30
30
|
|
|
31
31
|
<match access.**>
|
|
32
|
-
type webhdfs
|
|
32
|
+
@type webhdfs
|
|
33
33
|
host namenode.your.cluster.local
|
|
34
34
|
port 50070
|
|
35
35
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -40,7 +40,7 @@ If you want JSON object only (without time or tag or both on header of lines), s
|
|
|
40
40
|
To specify namenode, `namenode` is also available:
|
|
41
41
|
|
|
42
42
|
<match access.**>
|
|
43
|
-
type webhdfs
|
|
43
|
+
@type webhdfs
|
|
44
44
|
namenode master.your.cluster.local:50070
|
|
45
45
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
46
46
|
</match>
|
|
@@ -48,7 +48,7 @@ To specify namenode, `namenode` is also available:
|
|
|
48
48
|
To store data as LTSV without time and tag over WebHDFS:
|
|
49
49
|
|
|
50
50
|
<match access.**>
|
|
51
|
-
type webhdfs
|
|
51
|
+
@type webhdfs
|
|
52
52
|
host namenode.your.cluster.local
|
|
53
53
|
port 50070
|
|
54
54
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -58,7 +58,7 @@ To store data as LTSV without time and tag over WebHDFS:
|
|
|
58
58
|
With username of pseudo authentication:
|
|
59
59
|
|
|
60
60
|
<match access.**>
|
|
61
|
-
type webhdfs
|
|
61
|
+
@type webhdfs
|
|
62
62
|
host namenode.your.cluster.local
|
|
63
63
|
port 50070
|
|
64
64
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -68,7 +68,7 @@ With username of pseudo authentication:
|
|
|
68
68
|
Store data over HttpFs (instead of WebHDFS):
|
|
69
69
|
|
|
70
70
|
<match access.**>
|
|
71
|
-
type webhdfs
|
|
71
|
+
@type webhdfs
|
|
72
72
|
host httpfs.node.your.cluster.local
|
|
73
73
|
port 14000
|
|
74
74
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -78,7 +78,7 @@ Store data over HttpFs (instead of WebHDFS):
|
|
|
78
78
|
Store data as TSV (TAB separated values) of specified keys, without time, with tag (removed prefix 'access'):
|
|
79
79
|
|
|
80
80
|
<match access.**>
|
|
81
|
-
type webhdfs
|
|
81
|
+
@type webhdfs
|
|
82
82
|
host namenode.your.cluster.local
|
|
83
83
|
port 50070
|
|
84
84
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -96,7 +96,7 @@ If message doesn't have specified attribute, fluent-plugin-webhdfs outputs 'NULL
|
|
|
96
96
|
With ssl:
|
|
97
97
|
|
|
98
98
|
<match access.**>
|
|
99
|
-
type webhdfs
|
|
99
|
+
@type webhdfs
|
|
100
100
|
host namenode.your.cluster.local
|
|
101
101
|
port 50070
|
|
102
102
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -113,7 +113,7 @@ and [openssl](http://www.ruby-doc.org/stdlib-2.1.3/libdoc/openssl/rdoc/OpenSSL.h
|
|
|
113
113
|
With kerberos authentication:
|
|
114
114
|
|
|
115
115
|
<match access.**>
|
|
116
|
-
type webhdfs
|
|
116
|
+
@type webhdfs
|
|
117
117
|
host namenode.your.cluster.local
|
|
118
118
|
port 50070
|
|
119
119
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -123,21 +123,21 @@ With kerberos authentication:
|
|
|
123
123
|
If you want to compress data before storing it:
|
|
124
124
|
|
|
125
125
|
<match access.**>
|
|
126
|
-
type webhdfs
|
|
126
|
+
@type webhdfs
|
|
127
127
|
host namenode.your.cluster.local
|
|
128
128
|
port 50070
|
|
129
129
|
path /path/on/hdfs/access.log.%Y%m%d_%H
|
|
130
|
-
compress gzip #
|
|
130
|
+
compress gzip # or 'bzip2', 'lzo_command'
|
|
131
131
|
</match>
|
|
132
132
|
|
|
133
|
-
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path.
|
|
133
|
+
Note that if you set `compress gzip`, then the suffix `.gz` will be added to path (or `.bz2`, `.lzo`).
|
|
134
134
|
|
|
135
135
|
### Namenode HA / Auto retry for WebHDFS known errors
|
|
136
136
|
|
|
137
137
|
`fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
|
|
138
138
|
|
|
139
139
|
<match access.**>
|
|
140
|
-
type
|
|
140
|
+
@type webhdfs
|
|
141
141
|
namenode master1.your.cluster.local:50070
|
|
142
142
|
standby_namenode master2.your.cluster.local:50070
|
|
143
143
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
@@ -146,7 +146,7 @@ Note that if you set `compress gzip`, then the suffix `.gz` will be added to pat
|
|
|
146
146
|
And you can also specify to retry known hdfs errors (such like `LeaseExpiredException`) automatically. With this configuration, fluentd doesn't write logs for this errors if retry successed.
|
|
147
147
|
|
|
148
148
|
<match access.**>
|
|
149
|
-
type
|
|
149
|
+
@type webhdfs
|
|
150
150
|
namenode master1.your.cluster.local:50070
|
|
151
151
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
|
152
152
|
retry_known_errors yes
|
|
@@ -162,7 +162,7 @@ You can use '${hostname}' or '${uuid:random}' placeholders in configuration for
|
|
|
162
162
|
For hostname:
|
|
163
163
|
|
|
164
164
|
<match access.**>
|
|
165
|
-
type webhdfs
|
|
165
|
+
@type webhdfs
|
|
166
166
|
host namenode.your.cluster.local
|
|
167
167
|
port 50070
|
|
168
168
|
path /log/access/%Y%m%d/${hostname}.log
|
|
@@ -171,7 +171,7 @@ For hostname:
|
|
|
171
171
|
Or with random filename (to avoid duplicated file name only):
|
|
172
172
|
|
|
173
173
|
<match access.**>
|
|
174
|
-
type webhdfs
|
|
174
|
+
@type webhdfs
|
|
175
175
|
host namenode.your.cluster.local
|
|
176
176
|
port 50070
|
|
177
177
|
path /log/access/%Y%m%d/${uuid:random}.log
|
|
@@ -182,7 +182,7 @@ With configurations above, you can handle all of files of '/log/access/20120820/
|
|
|
182
182
|
For high load cluster nodes, you can specify timeouts for HTTP requests.
|
|
183
183
|
|
|
184
184
|
<match access.**>
|
|
185
|
-
type webhdfs
|
|
185
|
+
@type webhdfs
|
|
186
186
|
namenode master.your.cluster.local:50070
|
|
187
187
|
path /log/access/%Y%m%d/${hostname}.log
|
|
188
188
|
open_timeout 180 # [sec] default: 30
|
|
@@ -196,7 +196,7 @@ With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status
|
|
|
196
196
|
If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
|
197
197
|
|
|
198
198
|
<match access.**>
|
|
199
|
-
type webhdfs
|
|
199
|
+
@type webhdfs
|
|
200
200
|
host namenode.your.cluster.local
|
|
201
201
|
port 50070
|
|
202
202
|
path /log/access/%Y%m%d/${hostname}.log
|
|
@@ -208,7 +208,7 @@ If you were usging unstable NameNodes and have wanted to ignore NameNode errors
|
|
|
208
208
|
With unstable datanodes that frequently downs, appending over WebHDFS may produce broken files. In such cases, specify `append no` and `${chunk_id}` parameter.
|
|
209
209
|
|
|
210
210
|
<match access.**>
|
|
211
|
-
type webhdfs
|
|
211
|
+
@type webhdfs
|
|
212
212
|
host namenode.your.cluster.local
|
|
213
213
|
port 50070
|
|
214
214
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |gem|
|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
|
5
|
-
gem.version = "0.
|
|
5
|
+
gem.version = "0.5.0"
|
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
|
@@ -17,7 +17,8 @@ Gem::Specification.new do |gem|
|
|
|
17
17
|
|
|
18
18
|
gem.add_development_dependency "rake"
|
|
19
19
|
gem.add_development_dependency "test-unit"
|
|
20
|
-
gem.
|
|
20
|
+
gem.add_development_dependency "appraisal"
|
|
21
|
+
gem.add_runtime_dependency "fluentd", '>= 0.10.59'
|
|
21
22
|
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
|
22
23
|
gem.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
|
|
23
24
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
|
@@ -85,7 +85,7 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
|
85
85
|
desc 'Use kerberos authentication or not'
|
|
86
86
|
config_param :kerberos, :bool, :default => false
|
|
87
87
|
|
|
88
|
-
SUPPORTED_COMPRESS = ['gzip', 'bzip2']
|
|
88
|
+
SUPPORTED_COMPRESS = ['gzip', 'bzip2', 'lzo_command']
|
|
89
89
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
|
90
90
|
config_param :compress, :default => nil do |val|
|
|
91
91
|
unless SUPPORTED_COMPRESS.include? val
|
|
@@ -357,3 +357,4 @@ end
|
|
|
357
357
|
require 'fluent/plugin/webhdfs_compressor_text'
|
|
358
358
|
require 'fluent/plugin/webhdfs_compressor_gzip'
|
|
359
359
|
require 'fluent/plugin/webhdfs_compressor_bzip2'
|
|
360
|
+
require 'fluent/plugin/webhdfs_compressor_lzo_command'
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module Fluent
|
|
2
|
+
class WebHDFSOutput
|
|
3
|
+
class LZOCommandCompressor < Compressor
|
|
4
|
+
WebHDFSOutput.register_compressor('lzo_command', self)
|
|
5
|
+
|
|
6
|
+
config_param :command_parameter, :string, :default => '-qf1'
|
|
7
|
+
|
|
8
|
+
def configure(conf)
|
|
9
|
+
super
|
|
10
|
+
check_command('lzop', 'LZO')
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def ext
|
|
14
|
+
'.lzo'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def compress(chunk, tmp)
|
|
18
|
+
w = Tempfile.new("chunk-lzo-tmp-")
|
|
19
|
+
w.binmode
|
|
20
|
+
chunk.write_to(w)
|
|
21
|
+
w.close
|
|
22
|
+
|
|
23
|
+
# We don't check the return code because we can't recover lzop failure.
|
|
24
|
+
system "lzop #{@command_parameter} -o #{tmp.path} #{w.path}"
|
|
25
|
+
ensure
|
|
26
|
+
w.close rescue nil
|
|
27
|
+
w.unlink rescue nil
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -68,7 +68,8 @@ kerberos true
|
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
data(gzip: ['gzip', Fluent::WebHDFSOutput::GzipCompressor],
|
|
71
|
-
bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor]
|
|
71
|
+
bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor],
|
|
72
|
+
lzo: ['lzo_command', Fluent::WebHDFSOutput::LZOCommandCompressor])
|
|
72
73
|
def test_compress(data)
|
|
73
74
|
compress_type, compressor_class = data
|
|
74
75
|
d = create_driver %[
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-webhdfs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- TAGOMORI Satoshi
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-
|
|
11
|
+
date: 2016-02-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -38,20 +38,34 @@ dependencies:
|
|
|
38
38
|
- - ">="
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: appraisal
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
41
55
|
- !ruby/object:Gem::Dependency
|
|
42
56
|
name: fluentd
|
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
|
44
58
|
requirements:
|
|
45
59
|
- - ">="
|
|
46
60
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: 0.10.
|
|
61
|
+
version: 0.10.59
|
|
48
62
|
type: :runtime
|
|
49
63
|
prerelease: false
|
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
65
|
requirements:
|
|
52
66
|
- - ">="
|
|
53
67
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: 0.10.
|
|
68
|
+
version: 0.10.59
|
|
55
69
|
- !ruby/object:Gem::Dependency
|
|
56
70
|
name: fluent-mixin-plaintextformatter
|
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -117,14 +131,17 @@ extra_rdoc_files: []
|
|
|
117
131
|
files:
|
|
118
132
|
- ".gitignore"
|
|
119
133
|
- ".travis.yml"
|
|
134
|
+
- Appraisals
|
|
120
135
|
- Gemfile
|
|
121
136
|
- LICENSE.txt
|
|
122
137
|
- README.md
|
|
123
138
|
- Rakefile
|
|
124
139
|
- fluent-plugin-webhdfs.gemspec
|
|
140
|
+
- gemfiles/fluentd_v0.12.gemfile
|
|
125
141
|
- lib/fluent/plugin/out_webhdfs.rb
|
|
126
142
|
- lib/fluent/plugin/webhdfs_compressor_bzip2.rb
|
|
127
143
|
- lib/fluent/plugin/webhdfs_compressor_gzip.rb
|
|
144
|
+
- lib/fluent/plugin/webhdfs_compressor_lzo_command.rb
|
|
128
145
|
- lib/fluent/plugin/webhdfs_compressor_text.rb
|
|
129
146
|
- test/helper.rb
|
|
130
147
|
- test/plugin/test_out_webhdfs.rb
|
|
@@ -148,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
148
165
|
version: '0'
|
|
149
166
|
requirements: []
|
|
150
167
|
rubyforge_project:
|
|
151
|
-
rubygems_version: 2.
|
|
168
|
+
rubygems_version: 2.5.1
|
|
152
169
|
signing_key:
|
|
153
170
|
specification_version: 4
|
|
154
171
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|