fluent-plugin-webhdfs 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +25 -0
- data/fluent-plugin-webhdfs.gemspec +6 -5
- data/lib/fluent/plugin/out_webhdfs.rb +4 -4
- data/test/plugin/test_out_webhdfs.rb +1 -0
- metadata +21 -15
data/README.md
CHANGED
@@ -5,6 +5,7 @@ Fluentd output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
|
|
5
5
|
WebHDFSOutput slices data by time (specified unit), and store these data as hdfs file of plain text. You can specify to:
|
6
6
|
|
7
7
|
* format whole data as serialized JSON, single attribute or separated multi attributes
|
8
|
+
* or LTSV, labeled-TSV (see http://ltsv.org/ )
|
8
9
|
* include time as line header, or not
|
9
10
|
* include tag as line header, or not
|
10
11
|
* change field separator (default: TAB)
|
@@ -25,6 +26,16 @@ To store data by time,tag,json (same with 'type file') over WebHDFS:
|
|
25
26
|
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
26
27
|
</match>
|
27
28
|
|
29
|
+
To store data as LTSV without time and tag over WebHDFS:
|
30
|
+
|
31
|
+
<match access.**>
|
32
|
+
type webhdfs
|
33
|
+
host namenode.your.cluster.local
|
34
|
+
port 50070
|
35
|
+
path /path/on/hdfs/access.log.%Y%m%d_%H.log
|
36
|
+
output_data_type ltsv
|
37
|
+
</match>
|
38
|
+
|
28
39
|
With username of pseudo authentication:
|
29
40
|
|
30
41
|
<match access.**>
|
@@ -88,6 +99,20 @@ Or with random filename (to avoid duplicated file name only):
|
|
88
99
|
|
89
100
|
With configurations above, you can handle all of files of '/log/access/20120820/*' as specified timeslice access logs.
|
90
101
|
|
102
|
+
### For unstable Namenodes
|
103
|
+
|
104
|
+
With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
|
105
|
+
|
106
|
+
If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
|
107
|
+
|
108
|
+
<match access.**>
|
109
|
+
type webhdfs
|
110
|
+
host namenode.your.cluster.local
|
111
|
+
port 50070
|
112
|
+
path /log/access/%Y%m%d/${hostname}.log
|
113
|
+
ignore_start_check_error true
|
114
|
+
</match>
|
115
|
+
|
91
116
|
## TODO
|
92
117
|
|
93
118
|
* patches welcome!
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
|
2
3
|
Gem::Specification.new do |gem|
|
3
4
|
gem.name = "fluent-plugin-webhdfs"
|
4
|
-
gem.version = "0.1.
|
5
|
+
gem.version = "0.1.3"
|
5
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
7
|
gem.email = ["tagomoris@gmail.com"]
|
7
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -15,11 +16,11 @@ Gem::Specification.new do |gem|
|
|
15
16
|
|
16
17
|
gem.add_development_dependency "rake"
|
17
18
|
gem.add_development_dependency "fluentd"
|
18
|
-
gem.add_development_dependency "fluent-mixin-plaintextformatter", '>= 0.2.
|
19
|
-
gem.add_development_dependency "fluent-mixin-config-placeholders"
|
19
|
+
gem.add_development_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
20
|
+
gem.add_development_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
|
20
21
|
gem.add_development_dependency "webhdfs", '>= 0.5.1'
|
21
22
|
gem.add_runtime_dependency "fluentd"
|
22
|
-
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.
|
23
|
-
gem.add_runtime_dependency "fluent-mixin-config-placeholders"
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
24
|
+
gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
|
24
25
|
gem.add_runtime_dependency "webhdfs", '>= 0.5.1'
|
25
26
|
end
|
@@ -13,6 +13,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
13
13
|
config_param :port, :integer, :default => 50070
|
14
14
|
config_param :namenode, :string, :default => nil # host:port
|
15
15
|
|
16
|
+
config_param :ignore_start_check_error, :bool, :default => false
|
17
|
+
|
16
18
|
include Fluent::Mixin::ConfigPlaceholders
|
17
19
|
|
18
20
|
config_param :path, :string
|
@@ -74,15 +76,13 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
74
76
|
def start
|
75
77
|
super
|
76
78
|
|
77
|
-
noerror = false
|
78
79
|
begin
|
79
80
|
ary = @client.list('/')
|
80
|
-
|
81
|
+
$log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
|
81
82
|
rescue
|
82
83
|
$log.error "webdhfs check request failed!"
|
83
|
-
raise
|
84
|
+
raise unless @ignore_start_check_error
|
84
85
|
end
|
85
|
-
$log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
|
86
86
|
end
|
87
87
|
|
88
88
|
def shutdown
|
@@ -18,6 +18,7 @@ path /hdfs/path/file.%Y%m%d.log
|
|
18
18
|
assert_equal '%Y%m%d', d.instance.time_slice_format
|
19
19
|
assert_equal false, d.instance.httpfs
|
20
20
|
assert_nil d.instance.username
|
21
|
+
assert_equal false, d.instance.ignore_start_check_error
|
21
22
|
|
22
23
|
assert_equal true, d.instance.output_include_time
|
23
24
|
assert_equal true, d.instance.output_include_tag
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ! '>='
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 0.2.
|
53
|
+
version: 0.2.1
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,23 +58,23 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.2.
|
61
|
+
version: 0.2.1
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: fluent-mixin-config-placeholders
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
none: false
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: 0.2.0
|
70
70
|
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
|
-
- -
|
75
|
+
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
77
|
+
version: 0.2.0
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: webhdfs
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,7 +114,7 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - ! '>='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.2.
|
117
|
+
version: 0.2.1
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -122,23 +122,23 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ! '>='
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 0.2.
|
125
|
+
version: 0.2.1
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
127
|
name: fluent-mixin-config-placeholders
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
129
129
|
none: false
|
130
130
|
requirements:
|
131
|
-
- -
|
131
|
+
- - ~>
|
132
132
|
- !ruby/object:Gem::Version
|
133
|
-
version:
|
133
|
+
version: 0.2.0
|
134
134
|
type: :runtime
|
135
135
|
prerelease: false
|
136
136
|
version_requirements: !ruby/object:Gem::Requirement
|
137
137
|
none: false
|
138
138
|
requirements:
|
139
|
-
- -
|
139
|
+
- - ~>
|
140
140
|
- !ruby/object:Gem::Version
|
141
|
-
version:
|
141
|
+
version: 0.2.0
|
142
142
|
- !ruby/object:Gem::Dependency
|
143
143
|
name: webhdfs
|
144
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,15 +183,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
183
183
|
- - ! '>='
|
184
184
|
- !ruby/object:Gem::Version
|
185
185
|
version: '0'
|
186
|
+
segments:
|
187
|
+
- 0
|
188
|
+
hash: -2593820620278035456
|
186
189
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
187
190
|
none: false
|
188
191
|
requirements:
|
189
192
|
- - ! '>='
|
190
193
|
- !ruby/object:Gem::Version
|
191
194
|
version: '0'
|
195
|
+
segments:
|
196
|
+
- 0
|
197
|
+
hash: -2593820620278035456
|
192
198
|
requirements: []
|
193
199
|
rubyforge_project:
|
194
|
-
rubygems_version: 1.8.
|
200
|
+
rubygems_version: 1.8.23
|
195
201
|
signing_key:
|
196
202
|
specification_version: 3
|
197
203
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|