fluent-plugin-hoop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,31 @@
1
+ # rcov generated
2
+ coverage
3
+
4
+ # rdoc generated
5
+ rdoc
6
+
7
+ # yard generated
8
+ doc
9
+ .yardoc
10
+
11
+ # bundler
12
+ .bundle
13
+
14
+ # jeweler generated
15
+ pkg
16
+
17
+ # For MacOS
18
+ .DS_Store
19
+
20
+ # For TextMate, emacs, vim
21
+ *.tmproj
22
+ tmtags
23
+ *~
24
+ \#*
25
+ .\#*
26
+ *.swp
27
+
28
+ # not to lock gems version, and for bundler
29
+ Gemfile.lock
30
+ vendor
31
+ vendor/fluentd
@@ -0,0 +1,3 @@
1
+ [submodule "vendor/fluentd"]
2
+ path = vendor/fluentd
3
+ url = git://github.com/fluent/fluentd.git
data/AUTHORS ADDED
@@ -0,0 +1 @@
1
+ TAGOMORI Satoshi <tagomoris _at_ gmail.com>
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.6.4"
12
+ gem "rcov", ">= 0"
13
+ end
14
+
15
+ gem "rdoc"
16
+
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2011 TAGOMORI Satoshi
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,79 @@
1
+ = Hoop plugin for Fluentd
2
+
3
+ == Component
4
+
5
+ === HoopOutput
6
+
7
+ Store fluent-event as plain text to HDFS, over Hoop (HDFS http-fs).
8
+
9
+ Hoop is originally written in Cloudera, and merged on Apache Hadoop 0.23 tree. See:
10
+
11
+ [Apache Hadoop dev doc] https://github.com/apache/hadoop-common/blob/trunk/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm
12
+ [Cloudera Hoop doc (obsolete)] http://cloudera.github.com/hoop/docs/latest/index.html
13
+
14
+ HoopOutput slices data by time (for specified units), and store these data as plain text on hdfs. You can specify to:
15
+
16
+ - format whole data as serialized JSON, single attribute or separated multi attributes
17
+ - include time as line header, or not
18
+ - include tag as line header, or not
19
+ - change field separator (default: TAB)
20
+
21
+ == Configuration
22
+
23
+ === HoopOutput
24
+
25
+ Minimal configuration (output: TAB separated time,tag,json-serialized-data and terminated with newline):
26
+
27
+ <match hoop.**>
28
+ type hoop
29
+ hoop_server hoop-server.local:14000
30
+
31
+ # %Y %m %d %H %M %S are available as conversion specifications in path on hdfs
32
+ # If '%Y%m%d' specified, logs are sliced into per-day files automatically.
33
+ path /hoop/log-%Y%m/log-%Y%m%d.log
34
+
35
+ # 'username' is used pseudo authentication, see http://cloudera.github.com/hoop/docs/latest/HttpRestApi.html
36
+ username hoopuser
37
+ </match>
38
+
39
+ You will get output like below in hdfs file such as '/hoop/log-201112/log-20111231.log'
40
+
41
+ 2011-12-31T13:14:15Z [TAB] hoop.foo.bar [TAB] {"field1":12345,"field2":"one two three four five","field3":"OK"} [terminated by newline]
42
+ 2011-12-31T21:22:23Z [TAB] hoop.foo.val [TAB] {"field1":23456,"field2":"two three four five six","field3":"BAD"} [terminated by newline]
43
+
44
+ Single attribute with tag (removed prefix 'hoop.'), without time, separated by SPACE and NOT to terminate by newline ('message' data will be terminated with newline).
45
+
46
+ <match hoop.**>
47
+ type hoop
48
+ hoop_server hoop-server.local:14000
49
+ path /hoop/log-%Y%m/log-%Y%m%d-%H.log
50
+ username hoopuser
51
+
52
+ output_include_time false
53
+ output_include_tag true
54
+
55
+ # If you want multiple attribute, specify like 'attr:field1,field2,field3'
56
+ output_data_type attr:message
57
+
58
+ # field_separator allows 'SPACE', 'COMMA' and 'TAB'(default)
59
+ field_separator SPACE
60
+
61
+ # add_newline 's default is true
62
+ add_newline false
63
+
64
+ # tag 'hoop.foo.bar' is shrinked as 'foo.bar'
65
+ remove_prefix hoop
66
+
67
+ # used for tags only remove_prefix string, like 'hoop'
68
+ default_tag unknown
69
+ </match>
70
+
71
+ == TODO
72
+
73
+ - consider what to do next
74
+ - patches welcome!
75
+
76
+ == Copyright
77
+
78
+ Copyright:: Copyright (c) 2011- TAGOMORI Satoshi (tagomoris)
79
+ License:: Apache License, Version 2.0
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "fluent-plugin-hoop"
18
+ gem.description = "Hoop (HDFS http-fs) plugin for Fluent event collector"
19
+ gem.homepage = "http://github.com/tagomoris/fluent-plugin-hoop"
20
+ gem.summary = gem.description
21
+ # gem.version = File.read("VERSION").strip
22
+ gem.authors = ["TAGOMORI Satoshi"]
23
+ gem.email = "tagomoris@gmail.com"
24
+ gem.has_rdoc = false
25
+ # gem.license = "Apache License v2.0"
26
+ gem.files = `git ls-files`.split("\n")
27
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
28
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
29
+ gem.require_paths = ['lib']
30
+ gem.add_dependency "fluentd", "~> 0.10.8"
31
+ gem.add_development_dependency "rake", ">= 0.9.2"
32
+ gem.add_development_dependency "simplecov", ">= 0.5.4"
33
+ end
34
+ Jeweler::RubygemsDotOrgTasks.new
35
+
36
+ require 'rake/testtask'
37
+ Rake::TestTask.new(:test) do |test|
38
+ unless ENV['DEBUG']
39
+ ENV['FLUENT_TEST_DEBUG'] = 'TRUE'
40
+ end
41
+ test.libs << 'lib' << 'test'
42
+ test.pattern = 'test/**/test_*.rb'
43
+ test.verbose = true
44
+ end
45
+
46
+ require 'rcov/rcovtask'
47
+ Rcov::RcovTask.new do |test|
48
+ test.libs << 'test'
49
+ test.pattern = 'test/**/test_*.rb'
50
+ test.verbose = true
51
+ test.rcov_opts << '--exclude "gems/*"'
52
+ end
53
+
54
+ task :default => :test
55
+
56
+ require 'rdoc/task'
57
+ Rake::RDocTask.new do |rdoc|
58
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
59
+
60
+ rdoc.rdoc_dir = 'rdoc'
61
+ rdoc.title = "fluent-plugin-hoop #{version}"
62
+ rdoc.rdoc_files.include('README*')
63
+ rdoc.rdoc_files.include('lib/**/*.rb')
64
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,75 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{fluent-plugin-hoop}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = [%q{TAGOMORI Satoshi}]
12
+ s.date = %q{2011-12-26}
13
+ s.description = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
14
+ s.email = %q{tagomoris@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ ".gitmodules",
23
+ "AUTHORS",
24
+ "Gemfile",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "fluent-plugin-hoop.gemspec",
30
+ "lib/fluent/plugin/out_hoop.rb",
31
+ "test/helper.rb",
32
+ "test/plugin/test_out_hoop.rb",
33
+ "test/plugin/test_out_hoop_realserver.rb",
34
+ "test/plugin/test_out_hoop_reconnect.rb"
35
+ ]
36
+ s.homepage = %q{http://github.com/tagomoris/fluent-plugin-hoop}
37
+ s.require_paths = [%q{lib}]
38
+ s.rubygems_version = %q{1.8.6}
39
+ s.summary = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
40
+ s.test_files = [%q{test/helper.rb}, %q{test/plugin/test_out_hoop.rb}, %q{test/plugin/test_out_hoop_realserver.rb}, %q{test/plugin/test_out_hoop_reconnect.rb}]
41
+
42
+ if s.respond_to? :specification_version then
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
46
+ s.add_runtime_dependency(%q<rdoc>, [">= 0"])
47
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
48
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
49
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
50
+ s.add_development_dependency(%q<rcov>, [">= 0"])
51
+ s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.8"])
52
+ s.add_development_dependency(%q<rake>, [">= 0.9.2"])
53
+ s.add_development_dependency(%q<simplecov>, [">= 0.5.4"])
54
+ else
55
+ s.add_dependency(%q<rdoc>, [">= 0"])
56
+ s.add_dependency(%q<shoulda>, [">= 0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<rcov>, [">= 0"])
60
+ s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
61
+ s.add_dependency(%q<rake>, [">= 0.9.2"])
62
+ s.add_dependency(%q<simplecov>, [">= 0.5.4"])
63
+ end
64
+ else
65
+ s.add_dependency(%q<rdoc>, [">= 0"])
66
+ s.add_dependency(%q<shoulda>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
68
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
69
+ s.add_dependency(%q<rcov>, [">= 0"])
70
+ s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
71
+ s.add_dependency(%q<rake>, [">= 0.9.2"])
72
+ s.add_dependency(%q<simplecov>, [">= 0.5.4"])
73
+ end
74
+ end
75
+
@@ -0,0 +1,339 @@
1
+ module FluentExt; end
2
+ module FluentExt::PlainTextFormatterMixin
3
+ # config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
4
+
5
+ attr_accessor :output_include_time, :output_include_tag, :output_data_type
6
+ attr_accessor :add_newline, :field_separator
7
+ attr_accessor :remove_prefix, :default_tag
8
+
9
+ def configure(conf)
10
+ super
11
+
12
+ @output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
13
+ @output_include_time = true if @output_include_time.nil?
14
+
15
+ @output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
16
+ @output_include_tag = true if @output_include_tag.nil?
17
+
18
+ @output_data_type = conf['output_data_type']
19
+ @output_data_type = 'json' if @output_data_type.nil?
20
+
21
+ @field_separator = case @field_separator
22
+ when 'SPACE' then ' '
23
+ when 'COMMA' then ','
24
+ else "\t"
25
+ end
26
+ @add_newline = Fluent::Config.bool_value(conf['add_newline'])
27
+ if @add_newline.nil?
28
+ @add_newline = true
29
+ end
30
+
31
+ @remove_prefix = conf['remove_prefix']
32
+ if @remove_prefix
33
+ @removed_prefix_string = @remove_prefix + '.'
34
+ @removed_length = @removed_prefix_string.length
35
+ end
36
+ if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
37
+ @default_tag = conf['default_tag']
38
+ if @default_tag.nil? or @default_tag.length < 1
39
+ raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
40
+ end
41
+ end
42
+
43
+ # default timezone: utc
44
+ if conf['localtime'].nil? and conf['utc'].nil?
45
+ @utc = true
46
+ @localtime = false
47
+ elsif not @localtime and not @utc
48
+ @utc = true
49
+ @localtime = false
50
+ end
51
+ # mix-in default time formatter (or you can overwrite @timef on your own configure)
52
+ @timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
53
+
54
+ @custom_attributes = []
55
+ if @output_data_type == 'json'
56
+ self.instance_eval {
57
+ def stringify_record(record)
58
+ record.to_json
59
+ end
60
+ }
61
+ elsif @output_data_type =~ /^attr:(.*)$/
62
+ @custom_attributes = $1.split(',')
63
+ if @custom_attributes.size > 1
64
+ self.instance_eval {
65
+ def stringify_record(record)
66
+ @custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@field_separator)
67
+ end
68
+ }
69
+ elsif @custom_attributes.size == 1
70
+ self.instance_eval {
71
+ def stringify_record(record)
72
+ (record[@custom_attributes[0]] || 'NULL').to_s
73
+ end
74
+ }
75
+ else
76
+ raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
77
+ end
78
+ else
79
+ raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
80
+ end
81
+
82
+ if @output_include_time and @output_include_tag
83
+ if @add_newline and @remove_prefix
84
+ self.instance_eval {
85
+ def format(tag,time,record)
86
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
87
+ tag == @remove_prefix
88
+ tag = tag[@removed_length..-1] || @default_tag
89
+ end
90
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
91
+ end
92
+ }
93
+ elsif @add_newline
94
+ self.instance_eval {
95
+ def format(tag,time,record)
96
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
97
+ end
98
+ }
99
+ elsif @remove_prefix
100
+ self.instance_eval {
101
+ def format(tag,time,record)
102
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
103
+ tag == @remove_prefix
104
+ tag = tag[@removed_length..-1] || @default_tag
105
+ end
106
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
107
+ end
108
+ }
109
+ else
110
+ self.instance_eval {
111
+ def format(tag,time,record)
112
+ @timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
113
+ end
114
+ }
115
+ end
116
+ elsif @output_include_time
117
+ if @add_newline
118
+ self.instance_eval {
119
+ def format(tag,time,record);
120
+ @timef.format(time) + @field_separator + stringify_record(record) + "\n"
121
+ end
122
+ }
123
+ else
124
+ self.instance_eval {
125
+ def format(tag,time,record);
126
+ @timef.format(time) + @field_separator + stringify_record(record)
127
+ end
128
+ }
129
+ end
130
+ elsif @output_include_tag
131
+ if @add_newline and @remove_prefix
132
+ self.instance_eval {
133
+ def format(tag,time,record)
134
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
135
+ tag == @remove_prefix
136
+ tag = tag[@removed_length..-1] || @default_tag
137
+ end
138
+ tag + @field_separator + stringify_record(record) + "\n"
139
+ end
140
+ }
141
+ elsif @add_newline
142
+ self.instance_eval {
143
+ def format(tag,time,record)
144
+ tag + @field_separator + stringify_record(record) + "\n"
145
+ end
146
+ }
147
+ elsif @remove_prefix
148
+ self.instance_eval {
149
+ def format(tag,time,record)
150
+ if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
151
+ tag == @remove_prefix
152
+ tag = tag[@removed_length..-1] || @default_tag
153
+ end
154
+ tag + @field_separator + stringify_record(record)
155
+ end
156
+ }
157
+ else
158
+ self.instance_eval {
159
+ def format(tag,time,record)
160
+ tag + @field_separator + stringify_record(record)
161
+ end
162
+ }
163
+ end
164
+ else # without time, tag
165
+ if @add_newline
166
+ self.instance_eval {
167
+ def format(tag,time,record);
168
+ stringify_record(record) + "\n"
169
+ end
170
+ }
171
+ else
172
+ self.instance_eval {
173
+ def format(tag,time,record);
174
+ stringify_record(record)
175
+ end
176
+ }
177
+ end
178
+ end
179
+ end
180
+
181
+ def stringify_record(record)
182
+ record.to_json
183
+ end
184
+
185
+ def format(tag, time, record)
186
+ if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
187
+ tag = tag[@removed_length..-1] || @default_tag
188
+ end
189
+ time_str = if @output_include_time
190
+ @timef.format(time) + @field_separator
191
+ else
192
+ ''
193
+ end
194
+ tag_str = if @output_include_tag
195
+ tag + @field_separator
196
+ else
197
+ ''
198
+ end
199
+ time_str + tag_str + stringify_record(record) + "\n"
200
+ end
201
+
202
+ end
203
+
204
+ class Fluent::HoopOutput < Fluent::TimeSlicedOutput
205
+ Fluent::Plugin.register_output('hoop', self)
206
+
207
+ config_set_default :buffer_type, 'memory'
208
+ config_set_default :time_slice_format, '%Y%m%d' # %Y%m%d%H
209
+ # config_param :tag_format, :string, :default => 'all' # or 'last'(last.part.of.tag => tag) or 'none'
210
+
211
+ config_param :hoop_server, :string # host:port
212
+ config_param :path, :string # /path/pattern/to/hdfs/file can use %Y %m %d %H %M %S and %T(tag, not-supported-yet)
213
+ config_param :username, :string # hoop pseudo username
214
+
215
+ include FluentExt::PlainTextFormatterMixin
216
+ config_set_default :output_include_time, true
217
+ config_set_default :output_include_tag, true
218
+ config_set_default :output_data_type, 'json'
219
+ config_set_default :field_separator, "\t"
220
+ config_set_default :add_newline, true
221
+ config_set_default :remove_prefix, nil
222
+
223
+ def initialize
224
+ super
225
+ require 'net/http'
226
+ require 'time'
227
+ end
228
+
229
+ def configure(conf)
230
+ if conf['path']
231
+ if conf['path'].index('%S')
232
+ conf['time_slice_format'] = '%Y%m%d%H%M%S'
233
+ elsif conf['path'].index('%M')
234
+ conf['time_slice_format'] = '%Y%m%d%H%M'
235
+ elsif conf['path'].index('%H')
236
+ conf['time_slice_format'] = '%Y%m%d%H'
237
+ end
238
+ end
239
+
240
+ super
241
+
242
+ unless /\A([a-zA-Z0-9][-a-zA-Z0-9.]*):(\d+)\Z/ =~ @hoop_server
243
+ raise Fluent::ConfigError, "Invalid config value on hoop_server: '#{@hoop_server}', needs SERVER_NAME:PORT"
244
+ end
245
+ @host = $1
246
+ @port = $2.to_i
247
+ unless @path.index('/') == 0
248
+ raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
249
+ end
250
+ @conn = nil
251
+ @header = {'Content-Type' => 'application/octet-stream'}
252
+
253
+ @f_separator = case @field_separator
254
+ when 'SPACE' then ' '
255
+ when 'COMMA' then ','
256
+ else "\t"
257
+ end
258
+ end
259
+
260
+ def start
261
+ super
262
+
263
+ # okey, net/http has reconnect feature. see test_out_hoop_reconnect.rb
264
+ conn = Net::HTTP.start(@host, @port)
265
+ begin
266
+ res = conn.request_get("/?op=status&user.name=#{@username}")
267
+ if res.code.to_i < 300 and res['Set-Cookie']
268
+ @authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
269
+ else
270
+ $log.error "initalize request failed, code: #{res.code}, message: #{res.body}"
271
+ raise Fluent::ConfigError, "initalize request failed, code: #{res.code}, message: #{res.body}"
272
+ end
273
+ rescue
274
+ $log.error "failed to connect hoop server: #{@host} port #{@port}"
275
+ raise
276
+ end
277
+ conn.finish
278
+ $log.info "connected hoop server: #{@host} port #{@port}"
279
+ end
280
+
281
+ def shutdown
282
+ super
283
+ end
284
+
285
+ def record_to_string(record)
286
+ record.to_json
287
+ end
288
+
289
+ def format(tag, time, record)
290
+ time_str = @timef.format(time)
291
+ time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
292
+ end
293
+
294
+ def path_format(chunk_key)
295
+ Time.strptime(chunk_key, @time_slice_format).strftime(@path)
296
+ end
297
+
298
+ def send_data(path, data, retries=0)
299
+ conn = Net::HTTP.start(@host, @port)
300
+ conn.read_timeout = 5
301
+ res = conn.request_put(path + "?op=append", data, @authorized_header)
302
+ if res.code == '401'
303
+ res = conn.request_get("/?op=status&user.name=#{@username}")
304
+ if res.code.to_i < 300 and res['Set-Cookie']
305
+ @authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
306
+ else
307
+ $log.error "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
308
+ raise Fluent::ConfigError, "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
309
+ end
310
+ res = conn.request_put(hdfs_path + "?op=append", data, @authorized_header)
311
+ end
312
+ if res.code == '404'
313
+ res = conn.request_post(path + "?op=create&overwrite=false", data, @authorized_header)
314
+ end
315
+ if res.code == '500'
316
+ if retries >= 3
317
+ raise StandardError, "failed to send_data with retry 3 times InternalServerError"
318
+ end
319
+ sleep 0.3 # yes, this is a magic number
320
+ res = send_data(path, data, retries + 1)
321
+ end
322
+ conn.finish
323
+ if res.code != '200' and res.code != '201'
324
+ $log.warn "failed to write data to path: #{path}, code: #{res.code} #{res.message}"
325
+ end
326
+ res
327
+ end
328
+
329
+ def write(chunk)
330
+ hdfs_path = path_format(chunk.key)
331
+ begin
332
+ send_data(hdfs_path, chunk.read)
333
+ rescue
334
+ $log.error "failed to communicate server, #{@host} port #{@port}, path: #{hdfs_path}"
335
+ raise
336
+ end
337
+ hdfs_path
338
+ end
339
+ end