fluent-plugin-hoop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,31 @@
1
+ # rcov generated
2
+ coverage
3
+
4
+ # rdoc generated
5
+ rdoc
6
+
7
+ # yard generated
8
+ doc
9
+ .yardoc
10
+
11
+ # bundler
12
+ .bundle
13
+
14
+ # jeweler generated
15
+ pkg
16
+
17
+ # For MacOS
18
+ .DS_Store
19
+
20
+ # For TextMate, emacs, vim
21
+ *.tmproj
22
+ tmtags
23
+ *~
24
+ \#*
25
+ .\#*
26
+ *.swp
27
+
28
+ # not to lock gems version, and for bundler
29
+ Gemfile.lock
30
+ vendor
31
+ vendor/fluentd
@@ -0,0 +1,3 @@
1
+ [submodule "vendor/fluentd"]
2
+ path = vendor/fluentd
3
+ url = git://github.com/fluent/fluentd.git
data/AUTHORS ADDED
@@ -0,0 +1 @@
1
+ TAGOMORI Satoshi <tagomoris _at_ gmail.com>
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.6.4"
12
+ gem "rcov", ">= 0"
13
+ end
14
+
15
+ gem "rdoc"
16
+
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2011 TAGOMORI Satoshi
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,79 @@
1
+ = Hoop plugin for Fluentd
2
+
3
+ == Component
4
+
5
+ === HoopOutput
6
+
7
+ Store fluent-event as plain text to HDFS, over Hoop (HDFS http-fs).
8
+
9
+ Hoop is originally written in Cloudera, and merged on Apache Hadoop 0.23 tree. See:
10
+
11
+ [Apache Hadoop dev doc] https://github.com/apache/hadoop-common/blob/trunk/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm
12
+ [Cloudera Hoop doc (obsolete)] http://cloudera.github.com/hoop/docs/latest/index.html
13
+
14
+ HoopOutput slices data by time (for specified units), and store these data as plain text on hdfs. You can specify to:
15
+
16
+ - format whole data as serialized JSON, single attribute or separated multi attributes
17
+ - include time as line header, or not
18
+ - include tag as line header, or not
19
+ - change field separator (default: TAB)
20
+
21
+ == Configuration
22
+
23
+ === HoopOutput
24
+
25
+ Minimal configuration (output: TAB separated time,tag,json-serialized-data and terminated with newline):
26
+
27
+ <match hoop.**>
28
+ type hoop
29
+ hoop_server hoop-server.local:14000
30
+
31
+ # %Y %m %d %H %M %S are available as conversion specifications in path on hdfs
32
+ # If '%Y%m%d' specified, logs are sliced into per-day files automatically.
33
+ path /hoop/log-%Y%m/log-%Y%m%d.log
34
+
35
+ # 'username' is used pseudo authentication, see http://cloudera.github.com/hoop/docs/latest/HttpRestApi.html
36
+ username hoopuser
37
+ </match>
38
+
39
+ You will get output like below in hdfs file such as '/hoop/log-201112/log-20111231.log'
40
+
41
+ 2011-12-31T13:14:15Z [TAB] hoop.foo.bar [TAB] {"field1":12345,"field2":"one two three four five","field3":"OK"} [terminated by newline]
42
+ 2011-12-31T21:22:23Z [TAB] hoop.foo.val [TAB] {"field1":23456,"field2":"two three four five six","field3":"BAD"} [terminated by newline]
43
+
44
+ Single attribute with tag (removed prefix 'hoop.'), without time, separated by SPACE and NOT to terminate by newline ('message' data will be terminated with newline).
45
+
46
+ <match hoop.**>
47
+ type hoop
48
+ hoop_server hoop-server.local:14000
49
+ path /hoop/log-%Y%m/log-%Y%m%d-%H.log
50
+ username hoopuser
51
+
52
+ output_include_time false
53
+ output_include_tag true
54
+
55
+ # If you want multiple attribute, specify like 'attr:field1,field2,field3'
56
+ output_data_type attr:message
57
+
58
+ # field_separator allows 'SPACE', 'COMMA' and 'TAB'(default)
59
+ field_separator SPACE
60
+
61
+ # add_newline 's default is true
62
+ add_newline false
63
+
64
+ # tag 'hoop.foo.bar' is shrinked as 'foo.bar'
65
+ remove_prefix hoop
66
+
67
+ # used for tags only remove_prefix string, like 'hoop'
68
+ default_tag unknown
69
+ </match>
70
+
71
+ == TODO
72
+
73
+ - consider what to do next
74
+ - patches welcome!
75
+
76
+ == Copyright
77
+
78
+ Copyright:: Copyright (c) 2011- TAGOMORI Satoshi (tagomoris)
79
+ License:: Apache License, Version 2.0
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "fluent-plugin-hoop"
18
+ gem.description = "Hoop (HDFS http-fs) plugin for Fluent event collector"
19
+ gem.homepage = "http://github.com/tagomoris/fluent-plugin-hoop"
20
+ gem.summary = gem.description
21
+ # gem.version = File.read("VERSION").strip
22
+ gem.authors = ["TAGOMORI Satoshi"]
23
+ gem.email = "tagomoris@gmail.com"
24
+ gem.has_rdoc = false
25
+ # gem.license = "Apache License v2.0"
26
+ gem.files = `git ls-files`.split("\n")
27
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
28
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
29
+ gem.require_paths = ['lib']
30
+ gem.add_dependency "fluentd", "~> 0.10.8"
31
+ gem.add_development_dependency "rake", ">= 0.9.2"
32
+ gem.add_development_dependency "simplecov", ">= 0.5.4"
33
+ end
34
+ Jeweler::RubygemsDotOrgTasks.new
35
+
36
+ require 'rake/testtask'
37
+ Rake::TestTask.new(:test) do |test|
38
+ unless ENV['DEBUG']
39
+ ENV['FLUENT_TEST_DEBUG'] = 'TRUE'
40
+ end
41
+ test.libs << 'lib' << 'test'
42
+ test.pattern = 'test/**/test_*.rb'
43
+ test.verbose = true
44
+ end
45
+
46
+ require 'rcov/rcovtask'
47
+ Rcov::RcovTask.new do |test|
48
+ test.libs << 'test'
49
+ test.pattern = 'test/**/test_*.rb'
50
+ test.verbose = true
51
+ test.rcov_opts << '--exclude "gems/*"'
52
+ end
53
+
54
+ task :default => :test
55
+
56
+ require 'rdoc/task'
57
+ Rake::RDocTask.new do |rdoc|
58
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
59
+
60
+ rdoc.rdoc_dir = 'rdoc'
61
+ rdoc.title = "fluent-plugin-hoop #{version}"
62
+ rdoc.rdoc_files.include('README*')
63
+ rdoc.rdoc_files.include('lib/**/*.rb')
64
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,75 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{fluent-plugin-hoop}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = [%q{TAGOMORI Satoshi}]
12
+ s.date = %q{2011-12-26}
13
+ s.description = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
14
+ s.email = %q{tagomoris@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ ".gitmodules",
23
+ "AUTHORS",
24
+ "Gemfile",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "fluent-plugin-hoop.gemspec",
30
+ "lib/fluent/plugin/out_hoop.rb",
31
+ "test/helper.rb",
32
+ "test/plugin/test_out_hoop.rb",
33
+ "test/plugin/test_out_hoop_realserver.rb",
34
+ "test/plugin/test_out_hoop_reconnect.rb"
35
+ ]
36
+ s.homepage = %q{http://github.com/tagomoris/fluent-plugin-hoop}
37
+ s.require_paths = [%q{lib}]
38
+ s.rubygems_version = %q{1.8.6}
39
+ s.summary = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
40
+ s.test_files = [%q{test/helper.rb}, %q{test/plugin/test_out_hoop.rb}, %q{test/plugin/test_out_hoop_realserver.rb}, %q{test/plugin/test_out_hoop_reconnect.rb}]
41
+
42
+ if s.respond_to? :specification_version then
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
46
+ s.add_runtime_dependency(%q<rdoc>, [">= 0"])
47
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
48
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
49
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
50
+ s.add_development_dependency(%q<rcov>, [">= 0"])
51
+ s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.8"])
52
+ s.add_development_dependency(%q<rake>, [">= 0.9.2"])
53
+ s.add_development_dependency(%q<simplecov>, [">= 0.5.4"])
54
+ else
55
+ s.add_dependency(%q<rdoc>, [">= 0"])
56
+ s.add_dependency(%q<shoulda>, [">= 0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<rcov>, [">= 0"])
60
+ s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
61
+ s.add_dependency(%q<rake>, [">= 0.9.2"])
62
+ s.add_dependency(%q<simplecov>, [">= 0.5.4"])
63
+ end
64
+ else
65
+ s.add_dependency(%q<rdoc>, [">= 0"])
66
+ s.add_dependency(%q<shoulda>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
68
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
69
+ s.add_dependency(%q<rcov>, [">= 0"])
70
+ s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
71
+ s.add_dependency(%q<rake>, [">= 0.9.2"])
72
+ s.add_dependency(%q<simplecov>, [">= 0.5.4"])
73
+ end
74
+ end
75
+
@@ -0,0 +1,339 @@
1
+ module FluentExt; end
2
+ module FluentExt::PlainTextFormatterMixin
3
+ # config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
4
+
5
+ attr_accessor :output_include_time, :output_include_tag, :output_data_type
6
+ attr_accessor :add_newline, :field_separator
7
+ attr_accessor :remove_prefix, :default_tag
8
+
9
+ def configure(conf)
10
+ super
11
+
12
+ @output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
13
+ @output_include_time = true if @output_include_time.nil?
14
+
15
+ @output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
16
+ @output_include_tag = true if @output_include_tag.nil?
17
+
18
+ @output_data_type = conf['output_data_type']
19
+ @output_data_type = 'json' if @output_data_type.nil?
20
+
21
+ @field_separator = case @field_separator
22
+ when 'SPACE' then ' '
23
+ when 'COMMA' then ','
24
+ else "\t"
25
+ end
26
+ @add_newline = Fluent::Config.bool_value(conf['add_newline'])
27
+ if @add_newline.nil?
28
+ @add_newline = true
29
+ end
30
+
31
+ @remove_prefix = conf['remove_prefix']
32
+ if @remove_prefix
33
+ @removed_prefix_string = @remove_prefix + '.'
34
+ @removed_length = @removed_prefix_string.length
35
+ end
36
+ if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
37
+ @default_tag = conf['default_tag']
38
+ if @default_tag.nil? or @default_tag.length < 1
39
+ raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
40
+ end
41
+ end
42
+
43
+ # default timezone: utc
44
+ if conf['localtime'].nil? and conf['utc'].nil?
45
+ @utc = true
46
+ @localtime = false
47
+ elsif not @localtime and not @utc
48
+ @utc = true
49
+ @localtime = false
50
+ end
51
+ # mix-in default time formatter (or you can overwrite @timef on your own configure)
52
+ @timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
53
+
54
+ @custom_attributes = []
55
+ if @output_data_type == 'json'
56
+ self.instance_eval {
57
+ def stringify_record(record)
58
+ record.to_json
59
+ end
60
+ }
61
+ elsif @output_data_type =~ /^attr:(.*)$/
62
+ @custom_attributes = $1.split(',')
63
+ if @custom_attributes.size > 1
64
+ self.instance_eval {
65
+ def stringify_record(record)
66
+ @custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@field_separator)
67
+ end
68
+ }
69
+ elsif @custom_attributes.size == 1
70
+ self.instance_eval {
71
+ def stringify_record(record)
72
+ (record[@custom_attributes[0]] || 'NULL').to_s
73
+ end
74
+ }
75
+ else
76
+ raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
77
+ end
78
+ else
79
+ raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
80
+ end
81
+
82
+ if @output_include_time and @output_include_tag
83
+ if @add_newline and @remove_prefix
84
+ self.instance_eval {
85
+ def format(tag,time,record)
86
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
87
+ tag == @remove_prefix
88
+ tag = tag[@removed_length..-1] || @default_tag
89
+ end
90
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
91
+ end
92
+ }
93
+ elsif @add_newline
94
+ self.instance_eval {
95
+ def format(tag,time,record)
96
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
97
+ end
98
+ }
99
+ elsif @remove_prefix
100
+ self.instance_eval {
101
+ def format(tag,time,record)
102
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
103
+ tag == @remove_prefix
104
+ tag = tag[@removed_length..-1] || @default_tag
105
+ end
106
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
107
+ end
108
+ }
109
+ else
110
+ self.instance_eval {
111
+ def format(tag,time,record)
112
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
113
+ end
114
+ }
115
+ end
116
+ elsif @output_include_time
117
+ if @add_newline
118
+ self.instance_eval {
119
+ def format(tag,time,record);
120
+ @timef.format(time) + @field_separator + stringify_record(record) + "\n"
121
+ end
122
+ }
123
+ else
124
+ self.instance_eval {
125
+ def format(tag,time,record);
126
+ @timef.format(time) + @field_separator + stringify_record(record)
127
+ end
128
+ }
129
+ end
130
+ elsif @output_include_tag
131
+ if @add_newline and @remove_prefix
132
+ self.instance_eval {
133
+ def format(tag,time,record)
134
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
135
+ tag == @remove_prefix
136
+ tag = tag[@removed_length..-1] || @default_tag
137
+ end
138
+ tag + @field_separator + stringify_record(record) + "\n"
139
+ end
140
+ }
141
+ elsif @add_newline
142
+ self.instance_eval {
143
+ def format(tag,time,record)
144
+ tag + @field_separator + stringify_record(record) + "\n"
145
+ end
146
+ }
147
+ elsif @remove_prefix
148
+ self.instance_eval {
149
+ def format(tag,time,record)
150
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
151
+ tag == @remove_prefix
152
+ tag = tag[@removed_length..-1] || @default_tag
153
+ end
154
+ tag + @field_separator + stringify_record(record)
155
+ end
156
+ }
157
+ else
158
+ self.instance_eval {
159
+ def format(tag,time,record)
160
+ tag + @field_separator + stringify_record(record)
161
+ end
162
+ }
163
+ end
164
+ else # without time, tag
165
+ if @add_newline
166
+ self.instance_eval {
167
+ def format(tag,time,record);
168
+ stringify_record(record) + "\n"
169
+ end
170
+ }
171
+ else
172
+ self.instance_eval {
173
+ def format(tag,time,record);
174
+ stringify_record(record)
175
+ end
176
+ }
177
+ end
178
+ end
179
+ end
180
+
181
+ def stringify_record(record)
182
+ record.to_json
183
+ end
184
+
185
+ def format(tag, time, record)
186
+ if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
187
+ tag = tag[@removed_length..-1] || @default_tag
188
+ end
189
+ time_str = if @output_include_time
190
+ @timef.format(time) + @field_separator
191
+ else
192
+ ''
193
+ end
194
+ tag_str = if @output_include_tag
195
+ tag + @field_separator
196
+ else
197
+ ''
198
+ end
199
+ time_str + tag_str + stringify_record(record) + "\n"
200
+ end
201
+
202
+ end
203
+
204
+ class Fluent::HoopOutput < Fluent::TimeSlicedOutput
205
+ Fluent::Plugin.register_output('hoop', self)
206
+
207
+ config_set_default :buffer_type, 'memory'
208
+ config_set_default :time_slice_format, '%Y%m%d' # %Y%m%d%H
209
+ # config_param :tag_format, :string, :default => 'all' # or 'last'(last.part.of.tag => tag) or 'none'
210
+
211
+ config_param :hoop_server, :string # host:port
212
+ config_param :path, :string # /path/pattern/to/hdfs/file can use %Y %m %d %H %M %S and %T(tag, not-supported-yet)
213
+ config_param :username, :string # hoop pseudo username
214
+
215
+ include FluentExt::PlainTextFormatterMixin
216
+ config_set_default :output_include_time, true
217
+ config_set_default :output_include_tag, true
218
+ config_set_default :output_data_type, 'json'
219
+ config_set_default :field_separator, "\t"
220
+ config_set_default :add_newline, true
221
+ config_set_default :remove_prefix, nil
222
+
223
+ def initialize
224
+ super
225
+ require 'net/http'
226
+ require 'time'
227
+ end
228
+
229
+ def configure(conf)
230
+ if conf['path']
231
+ if conf['path'].index('%S')
232
+ conf['time_slice_format'] = '%Y%m%d%H%M%S'
233
+ elsif conf['path'].index('%M')
234
+ conf['time_slice_format'] = '%Y%m%d%H%M'
235
+ elsif conf['path'].index('%H')
236
+ conf['time_slice_format'] = '%Y%m%d%H'
237
+ end
238
+ end
239
+
240
+ super
241
+
242
+ unless /\A([a-zA-Z0-9][-a-zA-Z0-9.]*):(\d+)\Z/ =~ @hoop_server
243
+ raise Fluent::ConfigError, "Invalid config value on hoop_server: '#{@hoop_server}', needs SERVER_NAME:PORT"
244
+ end
245
+ @host = $1
246
+ @port = $2.to_i
247
+ unless @path.index('/') == 0
248
+ raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
249
+ end
250
+ @conn = nil
251
+ @header = {'Content-Type' => 'application/octet-stream'}
252
+
253
+ @f_separator = case @field_separator
254
+ when 'SPACE' then ' '
255
+ when 'COMMA' then ','
256
+ else "\t"
257
+ end
258
+ end
259
+
260
+ def start
261
+ super
262
+
263
+ # okey, net/http has reconnect feature. see test_out_hoop_reconnect.rb
264
+ conn = Net::HTTP.start(@host, @port)
265
+ begin
266
+ res = conn.request_get("/?op=status&user.name=#{@username}")
267
+ if res.code.to_i < 300 and res['Set-Cookie']
268
+ @authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
269
+ else
270
+ $log.error "initalize request failed, code: #{res.code}, message: #{res.body}"
271
+ raise Fluent::ConfigError, "initalize request failed, code: #{res.code}, message: #{res.body}"
272
+ end
273
+ rescue
274
+ $log.error "failed to connect hoop server: #{@host} port #{@port}"
275
+ raise
276
+ end
277
+ conn.finish
278
+ $log.info "connected hoop server: #{@host} port #{@port}"
279
+ end
280
+
281
+ def shutdown
282
+ super
283
+ end
284
+
285
+ def record_to_string(record)
286
+ record.to_json
287
+ end
288
+
289
+ def format(tag, time, record)
290
+ time_str = @timef.format(time)
291
+ time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
292
+ end
293
+
294
+ def path_format(chunk_key)
295
+ Time.strptime(chunk_key, @time_slice_format).strftime(@path)
296
+ end
297
+
298
+ def send_data(path, data, retries=0)
299
+ conn = Net::HTTP.start(@host, @port)
300
+ conn.read_timeout = 5
301
+ res = conn.request_put(path + "?op=append", data, @authorized_header)
302
+ if res.code == '401'
303
+ res = conn.request_get("/?op=status&user.name=#{@username}")
304
+ if res.code.to_i < 300 and res['Set-Cookie']
305
+ @authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
306
+ else
307
+ $log.error "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
308
+ raise Fluent::ConfigError, "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
309
+ end
310
+ res = conn.request_put(hdfs_path + "?op=append", data, @authorized_header)
311
+ end
312
+ if res.code == '404'
313
+ res = conn.request_post(path + "?op=create&overwrite=false", data, @authorized_header)
314
+ end
315
+ if res.code == '500'
316
+ if retries >= 3
317
+ raise StandardError, "failed to send_data with retry 3 times InternalServerError"
318
+ end
319
+ sleep 0.3 # yes, this is a magic number
320
+ res = send_data(path, data, retries + 1)
321
+ end
322
+ conn.finish
323
+ if res.code != '200' and res.code != '201'
324
+ $log.warn "failed to write data to path: #{path}, code: #{res.code} #{res.message}"
325
+ end
326
+ res
327
+ end
328
+
329
+ def write(chunk)
330
+ hdfs_path = path_format(chunk.key)
331
+ begin
332
+ send_data(hdfs_path, chunk.read)
333
+ rescue
334
+ $log.error "failed to communicate server, #{@host} port #{@port}, path: #{hdfs_path}"
335
+ raise
336
+ end
337
+ hdfs_path
338
+ end
339
+ end