fluent-plugin-webhdfs 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 65580c8eb45e57136473c4713aef1b7bf8b7face
4
- data.tar.gz: 75867f172f39ffdf0a1d3f9c94eeb703ad062b97
3
+ metadata.gz: b109af994a373ce05a964a0b1560bfad64b08f6d
4
+ data.tar.gz: 3bf1cc0de4b54d399158ead361f90825d4f8ba92
5
5
  SHA512:
6
- metadata.gz: 27acabdcaf0716e4daa491f208ca29d4441e0b9e2bc4b270d2652de65695f3e24dae0a938d8b5d7cedc186bc7137569ab3a81878c51fe3553814a81a5b98ca88
7
- data.tar.gz: 2ad981a4bbf498be9b828e54202d0ecea87b0646f69605068281032ca4f9c891e7251967b8b70bbc5d1c50fc34cf7f65b20c52843c33c60f785dcea8b6365f1b
6
+ metadata.gz: e1ca791673549cc32e874360d07c57005af2a2c88e1d7b3ad6a5bd491cfa01d1724f196a793b8294eca3b8448792d3c2b148006ac61ad24b8e583050bcd2a789
7
+ data.tar.gz: ad02485500aa4af1b1f8bff78e91722ebebad6387bbbe4c53e89ba7c094119693afe8ff0fa42018f9a75ac206d689c1b5fcae8b6e215305d1a92db22f476c23d
data/README.md CHANGED
@@ -26,6 +26,14 @@ To store data by time,tag,json (same with 'type file') over WebHDFS:
26
26
  path /path/on/hdfs/access.log.%Y%m%d_%H.log
27
27
  </match>
28
28
 
29
+ To specify namenode, `namenode` is also available:
30
+
31
+ <match access.**>
32
+ type webhdfs
33
+ namenode master.your.cluster.local:50070
34
+ path /path/on/hdfs/access.log.%Y%m%d_%H.log
35
+ </match>
36
+
29
37
  To store data as LTSV without time and tag over WebHDFS:
30
38
 
31
39
  <match access.**>
@@ -74,6 +82,28 @@ Store data as TSV (TAB separated values) of specified keys, without time, with t
74
82
 
75
83
  If message doesn't have specified attribute, fluent-plugin-webhdfs outputs 'NULL' instead of values.
76
84
 
85
+ ### Namenode HA / Auto retry for WebHDFS known errors
86
+
87
+ `fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
88
+
89
+ <match access.**>
90
+ type webhdfs
91
+ namenode master1.your.cluster.local:50070
92
+ standby_namenode master2.your.cluster.local:50070
93
+ path /path/on/hdfs/access.log.%Y%m%d_%H.log
94
+ </match>
95
+
96
+ And you can also specify to retry known hdfs errors (such like `LeaseExpiredException`) automatically. With this configuration, fluentd doesn't write logs for this errors if retry successed.
97
+
98
+ <match access.**>
99
+ type webhdfs
100
+ namenode master1.your.cluster.local:50070
101
+ path /path/on/hdfs/access.log.%Y%m%d_%H.log
102
+ retry_known_errors yes
103
+ retry_times 1 # default 1
104
+ retry_interval 1 # [sec] default 1
105
+ </match>
106
+
77
107
  ### Performance notifications
78
108
 
79
109
  Writing data on HDFS single file from 2 or more fluentd nodes, makes many bad blocks of HDFS. If you want to run 2 or more fluentd nodes with fluent-plugin-webhdfs, you should configure 'path' for each node.
@@ -99,6 +129,16 @@ Or with random filename (to avoid duplicated file name only):
99
129
 
100
130
  With configurations above, you can handle all of files of '/log/access/20120820/*' as specified timeslice access logs.
101
131
 
132
+ For high load cluster nodes, you can specify timeouts for HTTP requests.
133
+
134
+ <match access.**>
135
+ type webhdfs
136
+ namenode master.your.cluster.local:50070
137
+ path /log/access/%Y%m%d/${hostname}.log
138
+ open_timeout 180 # [sec] default: 30
139
+ read_timeout 180 # [sec] default: 60
140
+ </match>
141
+
102
142
  ### For unstable Namenodes
103
143
 
104
144
  With default configuration, fluent-plugin-webhdfs checks HDFS filesystem status and raise error for inacive NameNodes.
@@ -115,6 +155,8 @@ If you were usging unstable NameNodes and have wanted to ignore NameNode errors
115
155
 
116
156
  ## TODO
117
157
 
158
+ * configuration example for Hadoop Namenode HA
159
+ * here, or docs.fluentd.org ?
118
160
  * patches welcome!
119
161
 
120
162
  ## Copyright
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.1.4"
5
+ gem.version = "0.2.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -18,5 +18,5 @@ Gem::Specification.new do |gem|
18
18
  gem.add_runtime_dependency "fluentd"
19
19
  gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
20
20
  gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.0"
21
- gem.add_runtime_dependency "webhdfs", '>= 0.5.1'
21
+ gem.add_runtime_dependency "webhdfs", '>= 0.5.3'
22
22
  end
@@ -12,6 +12,7 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
12
12
  config_param :host, :string, :default => nil
13
13
  config_param :port, :integer, :default => 50070
14
14
  config_param :namenode, :string, :default => nil # host:port
15
+ config_param :standby_namenode, :string, :default => nil # host:port
15
16
 
16
17
  config_param :ignore_start_check_error, :bool, :default => false
17
18
 
@@ -25,6 +26,15 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
25
26
  config_param :open_timeout, :integer, :default => 30 # from ruby net/http default
26
27
  config_param :read_timeout, :integer, :default => 60 # from ruby net/http default
27
28
 
29
+ config_param :retry_known_errors, :bool, :default => false
30
+ config_param :retry_interval, :integer, :default => nil
31
+ config_param :retry_times, :integer, :default => nil
32
+
33
+ # how many times of write failure before switch to standby namenode
34
+ # by default it's 11 times that costs 1023 seconds inside fluentd,
35
+ # which is considered enough to exclude the scenes that caused by temporary network fail or single datanode fail
36
+ config_param :failures_before_use_standby, :integer, :default => 11
37
+
28
38
  include Fluent::Mixin::PlainTextFormatter
29
39
 
30
40
  config_param :default_tag, :string, :default => 'tag_missing'
@@ -54,34 +64,80 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
54
64
  @namenode_port = @port
55
65
  elsif @namenode
56
66
  unless /\A([a-zA-Z0-9][-a-zA-Z0-9.]*):(\d+)\Z/ =~ @namenode
57
- raise Fluent::ConfigError, "Invalid config value about namenode: '#{@namenode}', needs NAMENODE_NAME:PORT"
67
+ raise Fluent::ConfigError, "Invalid config value about namenode: '#{@namenode}', needs NAMENODE_HOST:PORT"
58
68
  end
59
69
  @namenode_host = $1
60
70
  @namenode_port = $2.to_i
61
71
  else
62
72
  raise Fluent::ConfigError, "WebHDFS host or namenode missing"
63
73
  end
74
+ if @standby_namenode
75
+ unless /\A([a-zA-Z0-9][-a-zA-Z0-9.]*):(\d+)\Z/ =~ @standby_namenode
76
+ raise Fluent::ConfigError, "Invalid config value about standby namenode: '#{@standby_namenode}', needs STANDBY_NAMENODE_HOST:PORT"
77
+ end
78
+ if @httpfs
79
+ raise Fluent::ConfigError, "Invalid configuration: specified to use both of standby_namenode and httpfs."
80
+ end
81
+ @standby_namenode_host = $1
82
+ @standby_namenode_port = $2.to_i
83
+ end
64
84
  unless @path.index('/') == 0
65
85
  raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
66
86
  end
67
-
68
- @client = WebHDFS::Client.new(@namenode_host, @namenode_port, @username)
87
+
88
+ @client = prepare_client(@namenode_host, @namenode_port, @username)
89
+ if @standby_namenode_host
90
+ @client_standby = prepare_client(@standby_namenode_host, @standby_namenode_port, @username)
91
+ else
92
+ @client_standby = nil
93
+ end
94
+ end
95
+
96
+ def prepare_client(host, port, username)
97
+ client = WebHDFS::Client.new(host, port, username)
69
98
  if @httpfs
70
- @client.httpfs_mode = true
99
+ client.httpfs_mode = true
100
+ end
101
+ client.open_timeout = @open_timeout
102
+ client.read_timeout = @read_timeout
103
+ if @retry_known_errors
104
+ client.retry_known_errors = true
105
+ client.retry_interval = @retry_interval if @retry_interval
106
+ client.retry_times = @retry_times if @retry_times
107
+ end
108
+
109
+ client
110
+ end
111
+
112
+ def namenode_available(client)
113
+ if client
114
+ available = true
115
+ begin
116
+ client.list('/')
117
+ rescue => e
118
+ $log.warn "webhdfs check request failed. (namenode: #{client.host}:#{client.port}, error: #{e.message})"
119
+ available = false
120
+ end
121
+ available
122
+ else
123
+ false
71
124
  end
72
- @client.open_timeout = @open_timeout
73
- @client.read_timeout = @read_timeout
74
125
  end
75
126
 
76
127
  def start
77
128
  super
78
129
 
79
- begin
80
- ary = @client.list('/')
130
+ if namenode_available(@client)
81
131
  $log.info "webhdfs connection confirmed: #{@namenode_host}:#{@namenode_port}"
82
- rescue
83
- $log.error "webdhfs check request failed!"
84
- raise unless @ignore_start_check_error
132
+ return
133
+ end
134
+ if @client_standby && namenode_available(@client_standby)
135
+ $log.info "webhdfs connection confirmed: #{@standby_namenode_host}:#{@standby_namenode_port}"
136
+ return
137
+ end
138
+
139
+ unless @ignore_start_check_error
140
+ raise RuntimeError, "webhdfs is not available now."
85
141
  end
86
142
  end
87
143
 
@@ -93,8 +149,19 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
93
149
  Time.strptime(chunk_key, @time_slice_format).strftime(@path)
94
150
  end
95
151
 
152
+ def is_standby_exception(e)
153
+ e.is_a?(WebHDFS::IOError) && e.message.match(/org\.apache\.hadoop\.ipc\.StandbyException/)
154
+ end
155
+
156
+ def namenode_failover
157
+ if @standby_namenode
158
+ @client, @client_standby = @client_standby, @client
159
+ $log.warn "Namenode failovered, now using #{@client.host}:#{@client.port}."
160
+ end
161
+ end
162
+
96
163
  # TODO check conflictions
97
-
164
+
98
165
  def send_data(path, data)
99
166
  begin
100
167
  @client.append(path, data)
@@ -105,11 +172,27 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
105
172
 
106
173
  def write(chunk)
107
174
  hdfs_path = path_format(chunk.key)
175
+ failovered = false
108
176
  begin
109
177
  send_data(hdfs_path, chunk.read)
110
- rescue
111
- $log.error "failed to communicate hdfs cluster, path: #{hdfs_path}"
112
- raise
178
+ rescue => e
179
+ $log.warn "failed to communicate hdfs cluster, path: #{hdfs_path}"
180
+
181
+ raise e if !@client_standby || failovered
182
+
183
+ if is_standby_exception(e) && namenode_available(@client_standby)
184
+ $log.warn "Seems the connected host status is not active (maybe due to failovers). Gonna try another namenode immediately."
185
+ namenode_failover
186
+ failovered = true
187
+ retry
188
+ end
189
+ if ((@error_history.size + 1) >= @failures_before_use_standby) && namenode_available(@client_standby)
190
+ $log.warn "Too many failures. Try to use the standby namenode instead."
191
+ namenode_failover
192
+ failovered = true
193
+ retry
194
+ end
195
+ raise e
113
196
  end
114
197
  hdfs_path
115
198
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-04-08 00:00:00.000000000 Z
11
+ date: 2013-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - '>='
74
74
  - !ruby/object:Gem::Version
75
- version: 0.5.1
75
+ version: 0.5.3
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - '>='
81
81
  - !ruby/object:Gem::Version
82
- version: 0.5.1
82
+ version: 0.5.3
83
83
  description: For WebHDFS and HttpFs of Hadoop HDFS
84
84
  email:
85
85
  - tagomoris@gmail.com