fluent-plugin-webhdfs 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -5,6 +5,7 @@ Fluentd output plugin to write data into Hadoop HDFS over WebHDFS/HttpFs.
5
5
  WebHDFSOutput slices data by time (specified unit), and store these data as hdfs file of plain text. You can specify to:
6
6
 
7
7
  * format whole data as serialized JSON, single attribute or separated multi attributes
8
+ * or LTSV, labeled-TSV (see http://ltsv.org/ )
8
9
  * include time as line header, or not
9
10
  * include tag as line header, or not
10
11
  * change field separator (default: TAB)
@@ -25,6 +26,16 @@ To store data by time,tag,json (same with 'type file') over WebHDFS:
25
26
  path /path/on/hdfs/access.log.%Y%m%d_%H.log
26
27
  </match>
27
28
 
29
+ To store data as LTSV without time and tag over WebHDFS:
30
+
31
+ <match access.**>
32
+ type webhdfs
33
+ host namenode.your.cluster.local
34
+ port 50070
35
+ path /path/on/hdfs/access.log.%Y%m%d_%H.log
36
+ output_data_type ltsv
37
+ </match>
38
+
28
39
  With username of pseudo authentication:
29
40
 
30
41
  <match access.**>
@@ -88,6 +99,20 @@ Or with random filename (to avoid duplicated file name only):
88
99
 
89
100
  With configurations above, you can handle all of files of '/log/access/20120820/*' as specified timeslice access logs.
90
101
 
102
+ ### For unstable Namenodes
103
+
104
+ With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
105
+
106
+ If you were usging unstable NameNodes and have wanted to ignore NameNode errors on startup of fluentd, enable `ignore_start_check_error` option like below:
107
+
108
+ <match access.**>
109
+ type webhdfs
110
+ host namenode.your.cluster.local
111
+ port 50070
112
+ path /log/access/%Y%m%d/${hostname}.log
113
+ ignore_start_check_error true
114
+ </match>
115
+
91
116
  ## TODO
92
117
 
93
118
  * patches welcome!
@@ -1,7 +1,8 @@
1
1
  # -*- encoding: utf-8 -*-
2
+
2
3
  Gem::Specification.new do |gem|
3
4
  gem.name = "fluent-plugin-webhdfs"
4
- gem.version = "0.1.1"
5
+ gem.version = "0.1.3"
5
6
  gem.authors = ["TAGOMORI Satoshi"]
6
7
  gem.email = ["tagomoris@gmail.com"]
7
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -15,11 +16,11 @@ Gem::Specification.new do |gem|
15
16
 
16
17
  gem.add_development_dependency "rake"
17
18
  gem.add_development_dependency "fluentd"
18
- gem.add_development_dependency "fluent-mixin-plaintextformatter", '>= 0.2.0'
19
- gem.add_development_dependency "fluent-mixin-config-placeholders"
19
+ gem.add_development_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
20
+ gem.add_development_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
20
21
  gem.add_development_dependency "webhdfs", '>= 0.5.1'
21
22
  gem.add_runtime_dependency "fluentd"
22
- gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.0'
23
- gem.add_runtime_dependency "fluent-mixin-config-placeholders"
23
+ gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
24
+ gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
24
25
  gem.add_runtime_dependency "webhdfs", '>= 0.5.1'
25
26
  end
@@ -13,6 +13,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
13
13
  config_param :port, :integer, :default => 50070
14
14
  config_param :namenode, :string, :default => nil # host:port
15
15
 
16
+ config_param :ignore_start_check_error, :bool, :default => false
17
+
16
18
  include Fluent::Mixin::ConfigPlaceholders
17
19
 
18
20
  config_param :path, :string
@@ -74,15 +76,13 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
74
76
  def start
75
77
  super
76
78
 
77
- noerror = false
78
79
  begin
79
80
  ary = @client.list('/')
80
- noerror = true
81
+ $log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
81
82
  rescue
82
83
  $log.error "webdhfs check request failed!"
83
- raise
84
+ raise unless @ignore_start_check_error
84
85
  end
85
- $log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
86
86
  end
87
87
 
88
88
  def shutdown
@@ -18,6 +18,7 @@ path /hdfs/path/file.%Y%m%d.log
18
18
  assert_equal '%Y%m%d', d.instance.time_slice_format
19
19
  assert_equal false, d.instance.httpfs
20
20
  assert_nil d.instance.username
21
+ assert_equal false, d.instance.ignore_start_check_error
21
22
 
22
23
  assert_equal true, d.instance.output_include_time
23
24
  assert_equal true, d.instance.output_include_tag
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-06 00:00:00.000000000 Z
12
+ date: 2013-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -50,7 +50,7 @@ dependencies:
50
50
  requirements:
51
51
  - - ! '>='
52
52
  - !ruby/object:Gem::Version
53
- version: 0.2.0
53
+ version: 0.2.1
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,23 +58,23 @@ dependencies:
58
58
  requirements:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
- version: 0.2.0
61
+ version: 0.2.1
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: fluent-mixin-config-placeholders
64
64
  requirement: !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
- - - ! '>='
67
+ - - ~>
68
68
  - !ruby/object:Gem::Version
69
- version: '0'
69
+ version: 0.2.0
70
70
  type: :development
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
- - - ! '>='
75
+ - - ~>
76
76
  - !ruby/object:Gem::Version
77
- version: '0'
77
+ version: 0.2.0
78
78
  - !ruby/object:Gem::Dependency
79
79
  name: webhdfs
80
80
  requirement: !ruby/object:Gem::Requirement
@@ -114,7 +114,7 @@ dependencies:
114
114
  requirements:
115
115
  - - ! '>='
116
116
  - !ruby/object:Gem::Version
117
- version: 0.2.0
117
+ version: 0.2.1
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
@@ -122,23 +122,23 @@ dependencies:
122
122
  requirements:
123
123
  - - ! '>='
124
124
  - !ruby/object:Gem::Version
125
- version: 0.2.0
125
+ version: 0.2.1
126
126
  - !ruby/object:Gem::Dependency
127
127
  name: fluent-mixin-config-placeholders
128
128
  requirement: !ruby/object:Gem::Requirement
129
129
  none: false
130
130
  requirements:
131
- - - ! '>='
131
+ - - ~>
132
132
  - !ruby/object:Gem::Version
133
- version: '0'
133
+ version: 0.2.0
134
134
  type: :runtime
135
135
  prerelease: false
136
136
  version_requirements: !ruby/object:Gem::Requirement
137
137
  none: false
138
138
  requirements:
139
- - - ! '>='
139
+ - - ~>
140
140
  - !ruby/object:Gem::Version
141
- version: '0'
141
+ version: 0.2.0
142
142
  - !ruby/object:Gem::Dependency
143
143
  name: webhdfs
144
144
  requirement: !ruby/object:Gem::Requirement
@@ -183,15 +183,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
183
183
  - - ! '>='
184
184
  - !ruby/object:Gem::Version
185
185
  version: '0'
186
+ segments:
187
+ - 0
188
+ hash: -2593820620278035456
186
189
  required_rubygems_version: !ruby/object:Gem::Requirement
187
190
  none: false
188
191
  requirements:
189
192
  - - ! '>='
190
193
  - !ruby/object:Gem::Version
191
194
  version: '0'
195
+ segments:
196
+ - 0
197
+ hash: -2593820620278035456
192
198
  requirements: []
193
199
  rubyforge_project:
194
- rubygems_version: 1.8.21
200
+ rubygems_version: 1.8.23
195
201
  signing_key:
196
202
  specification_version: 3
197
203
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting