fluent-plugin-webhdfs 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +25 -0
- data/fluent-plugin-webhdfs.gemspec +6 -5
- data/lib/fluent/plugin/out_webhdfs.rb +4 -4
- data/test/plugin/test_out_webhdfs.rb +1 -0
- metadata +21 -15
data/README.md
CHANGED
@@ -5,6 +5,7 @@ Fluentd output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
|
5
5
|
WebHDFSOutput slices data by time (specified unit), and store these data as hdfs file of plain text. You can specify to:
|
6
6
|
|
7
7
|
* format whole data as serialized JSON, single attribute or separated multi attributes
|
8
|
+
* or LTSV, labeled-TSV (see http://ltsv.org/ )
|
8
9
|
* include time as line header, or not
|
9
10
|
* include tag as line header, or not
|
10
11
|
* change field separator (default: TAB)
|
@@ -25,6 +26,16 @@ To store data by time,tag,json (same with 'type file') over WebHDFS:
|
|
25
26
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
26
27
|
</match>
|
27
28
|
|
29
|
+
To store data as LTSV without time and tag over WebHDFS:
|
30
|
+
|
31
|
+
<match access.**>
|
32
|
+
type webhdfs
|
33
|
+
host namenode.your.cluster.local
|
34
|
+
port 50070
|
35
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
36
|
+
output_data_type ltsv
|
37
|
+
</match>
|
38
|
+
|
28
39
|
With username of pseudo authentication:
|
29
40
|
|
30
41
|
<match access.**>
|
@@ -88,6 +99,20 @@ Or with random filename (to avoid duplicated file name only):
|
|
88
99
|
|
89
100
|
With configurations above, you can handle all of files of '/log/access/20120820/*' as specified timeslice access logs.
|
90
101
|
|
102
|
+
### For unstable Namenodes
|
103
|
+
|
104
|
+
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
|
105
|
+
|
106
|
+
If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
107
|
+
|
108
|
+
<match access.**>
|
109
|
+
type webhdfs
|
110
|
+
host namenode.your.cluster.local
|
111
|
+
port 50070
|
112
|
+
path /log/access/%Y%m%d/${hostname}.log
|
113
|
+
ignore_start_check_error true
|
114
|
+
</match>
|
115
|
+
|
91
116
|
## TODO
|
92
117
|
|
93
118
|
* patches welcome!
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
|
2
3
|
Gem::Specification.new do |gem|
|
3
4
|
gem.name = "fluent-plugin-webhdfs"
|
4
|
-
gem.version = "0.1.
|
5
|
+
gem.version = "0.1.3"
|
5
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
7
|
gem.email = ["tagomoris@gmail.com"]
|
7
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -15,11 +16,11 @@ Gem::Specification.new do |gem|
|
|
15
16
|
|
16
17
|
gem.add_development_dependency "rake"
|
17
18
|
gem.add_development_dependency "fluentd"
|
18
|
-
gem.add_development_dependency "fluent-mixin-plaintextformatter", '>= 0.2.
|
19
|
-
gem.add_development_dependency "fluent-mixin-config-placeholders"
|
19
|
+
gem.add_development_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
20
|
+
gem.add_development_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
|
20
21
|
gem.add_development_dependency "webhdfs", '>= 0.5.1'
|
21
22
|
gem.add_runtime_dependency "fluentd"
|
22
|
-
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.
|
23
|
-
gem.add_runtime_dependency "fluent-mixin-config-placeholders"
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
24
|
+
gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
|
24
25
|
gem.add_runtime_dependency "webhdfs", '>= 0.5.1'
|
25
26
|
end
|
@@ -13,6 +13,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
13
13
|
config_param :port, :integer, :default => 50070
|
14
14
|
config_param :namenode, :string, :default => nil # host:port
|
15
15
|
|
16
|
+
config_param :ignore_start_check_error, :bool, :default => false
|
17
|
+
|
16
18
|
include Fluent::Mixin::ConfigPlaceholders
|
17
19
|
|
18
20
|
config_param :path, :string
|
@@ -74,15 +76,13 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
74
76
|
def start
|
75
77
|
super
|
76
78
|
|
77
|
-
noerror = false
|
78
79
|
begin
|
79
80
|
ary = @client.list('/')
|
80
|
-
|
81
|
+
$log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
|
81
82
|
rescue
|
82
83
|
$log.error "webdhfs check request failed!"
|
83
|
-
raise
|
84
|
+
raise unless @ignore_start_check_error
|
84
85
|
end
|
85
|
-
$log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
|
86
86
|
end
|
87
87
|
|
88
88
|
def shutdown
|
@@ -18,6 +18,7 @@ path /hdfs/path/file.%Y%m%d.log
|
|
18
18
|
assert_equal '%Y%m%d', d.instance.time_slice_format
|
19
19
|
assert_equal false, d.instance.httpfs
|
20
20
|
assert_nil d.instance.username
|
21
|
+
assert_equal false, d.instance.ignore_start_check_error
|
21
22
|
|
22
23
|
assert_equal true, d.instance.output_include_time
|
23
24
|
assert_equal true, d.instance.output_include_tag
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ! '>='
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 0.2.
|
53
|
+
version: 0.2.1
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,23 +58,23 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.2.
|
61
|
+
version: 0.2.1
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: fluent-mixin-config-placeholders
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
none: false
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: 0.2.0
|
70
70
|
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
|
-
- -
|
75
|
+
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
77
|
+
version: 0.2.0
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: webhdfs
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,7 +114,7 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - ! '>='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.2.
|
117
|
+
version: 0.2.1
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -122,23 +122,23 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ! '>='
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 0.2.
|
125
|
+
version: 0.2.1
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
127
|
name: fluent-mixin-config-placeholders
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
129
129
|
none: false
|
130
130
|
requirements:
|
131
|
-
- -
|
131
|
+
- - ~>
|
132
132
|
- !ruby/object:Gem::Version
|
133
|
-
version:
|
133
|
+
version: 0.2.0
|
134
134
|
type: :runtime
|
135
135
|
prerelease: false
|
136
136
|
version_requirements: !ruby/object:Gem::Requirement
|
137
137
|
none: false
|
138
138
|
requirements:
|
139
|
-
- -
|
139
|
+
- - ~>
|
140
140
|
- !ruby/object:Gem::Version
|
141
|
-
version:
|
141
|
+
version: 0.2.0
|
142
142
|
- !ruby/object:Gem::Dependency
|
143
143
|
name: webhdfs
|
144
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,15 +183,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
183
183
|
- - ! '>='
|
184
184
|
- !ruby/object:Gem::Version
|
185
185
|
version: '0'
|
186
|
+
segments:
|
187
|
+
- 0
|
188
|
+
hash: -2593820620278035456
|
186
189
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
187
190
|
none: false
|
188
191
|
requirements:
|
189
192
|
- - ! '>='
|
190
193
|
- !ruby/object:Gem::Version
|
191
194
|
version: '0'
|
195
|
+
segments:
|
196
|
+
- 0
|
197
|
+
hash: -2593820620278035456
|
192
198
|
requirements: []
|
193
199
|
rubyforge_project:
|
194
|
-
rubygems_version: 1.8.
|
200
|
+
rubygems_version: 1.8.23
|
195
201
|
signing_key:
|
196
202
|
specification_version: 3
|
197
203
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|