fluent-plugin-hoop 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +31 -0
- data/.gitmodules +3 -0
- data/AUTHORS +1 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +13 -0
- data/README.rdoc +79 -0
- data/Rakefile +64 -0
- data/VERSION +1 -0
- data/fluent-plugin-hoop.gemspec +75 -0
- data/lib/fluent/plugin/out_hoop.rb +339 -0
- data/test/helper.rb +53 -0
- data/test/plugin/test_out_hoop.rb +441 -0
- data/test/plugin/test_out_hoop_realserver.rb +145 -0
- data/test/plugin/test_out_hoop_reconnect.rb +185 -0
- metadata +156 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# rcov generated
|
2
|
+
coverage
|
3
|
+
|
4
|
+
# rdoc generated
|
5
|
+
rdoc
|
6
|
+
|
7
|
+
# yard generated
|
8
|
+
doc
|
9
|
+
.yardoc
|
10
|
+
|
11
|
+
# bundler
|
12
|
+
.bundle
|
13
|
+
|
14
|
+
# jeweler generated
|
15
|
+
pkg
|
16
|
+
|
17
|
+
# For MacOS
|
18
|
+
.DS_Store
|
19
|
+
|
20
|
+
# For TextMate, emacs, vim
|
21
|
+
*.tmproj
|
22
|
+
tmtags
|
23
|
+
*~
|
24
|
+
\#*
|
25
|
+
.\#*
|
26
|
+
*.swp
|
27
|
+
|
28
|
+
# not to lock gems version, and for bundler
|
29
|
+
Gemfile.lock
|
30
|
+
vendor
|
31
|
+
vendor/fluentd
|
data/.gitmodules
ADDED
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
TAGOMORI Satoshi <tagomoris _at_ gmail.com>
|
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "shoulda", ">= 0"
|
10
|
+
gem "bundler", "~> 1.0.0"
|
11
|
+
gem "jeweler", "~> 1.6.4"
|
12
|
+
gem "rcov", ">= 0"
|
13
|
+
end
|
14
|
+
|
15
|
+
gem "rdoc"
|
16
|
+
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2011 TAGOMORI Satoshi
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.rdoc
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
= Hoop plugin for Fluentd
|
2
|
+
|
3
|
+
== Component
|
4
|
+
|
5
|
+
=== HoopOutput
|
6
|
+
|
7
|
+
Store fluent-event as plain text to HDFS, over Hoop (HDFS http-fs).
|
8
|
+
|
9
|
+
Hoop is originally written in Cloudera, and merged on Apache Hadoop 0.23 tree. See:
|
10
|
+
|
11
|
+
[Apache Hadoop dev doc] https://github.com/apache/hadoop-common/blob/trunk/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm
|
12
|
+
[Cloudera Hoop doc (obsolete)] http://cloudera.github.com/hoop/docs/latest/index.html
|
13
|
+
|
14
|
+
HoopOutput slices data by time (for specified units), and store these data as plain text on hdfs. You can specify to:
|
15
|
+
|
16
|
+
- format whole data as serialized JSON, single attribute or separated multi attributes
|
17
|
+
- include time as line header, or not
|
18
|
+
- include tag as line header, or not
|
19
|
+
- change field separator (default: TAB)
|
20
|
+
|
21
|
+
== Configuration
|
22
|
+
|
23
|
+
=== HoopOutput
|
24
|
+
|
25
|
+
Minimal configuration (output: TAB separated time,tag,json-serialized-data and terminated with newline):
|
26
|
+
|
27
|
+
<match hoop.**>
|
28
|
+
type hoop
|
29
|
+
hoop_server hoop-server.local:14000
|
30
|
+
|
31
|
+
# %Y %m %d %H %M %S are available as conversion specifications in path on hdfs
|
32
|
+
# If '%Y%m%d' specified, logs are sliced into per-day files automatically.
|
33
|
+
path /hoop/log-%Y%m/log-%Y%m%d.log
|
34
|
+
|
35
|
+
# 'username' is used pseudo authentication, see http://cloudera.github.com/hoop/docs/latest/HttpRestApi.html
|
36
|
+
username hoopuser
|
37
|
+
</match>
|
38
|
+
|
39
|
+
You will get output like below in hdfs file such as '/hoop/log-201112/log-20111231.log'
|
40
|
+
|
41
|
+
2011-12-31T13:14:15Z [TAB] hoop.foo.bar [TAB] {"field1":12345,"field2":"one two three four five","field3":"OK"} [terminated by newline]
|
42
|
+
2011-12-31T21:22:23Z [TAB] hoop.foo.val [TAB] {"field1":23456,"field2":"two three four five six","field3":"BAD"} [terminated by newline]
|
43
|
+
|
44
|
+
Single attribute with tag (removed prefix 'hoop.'), without time, separated by SPACE and NOT to terminate by newline ('message' data will be terminated with newline).
|
45
|
+
|
46
|
+
<match hoop.**>
|
47
|
+
type hoop
|
48
|
+
hoop_server hoop-server.local:14000
|
49
|
+
path /hoop/log-%Y%m/log-%Y%m%d-%H.log
|
50
|
+
username hoopuser
|
51
|
+
|
52
|
+
output_include_time false
|
53
|
+
output_include_tag true
|
54
|
+
|
55
|
+
# If you want multiple attribute, specify like 'attr:field1,field2,field3'
|
56
|
+
output_data_type attr:message
|
57
|
+
|
58
|
+
# field_separator allows 'SPACE', 'COMMA' and 'TAB'(default)
|
59
|
+
field_separator SPACE
|
60
|
+
|
61
|
+
# add_newline 's default is true
|
62
|
+
add_newline false
|
63
|
+
|
64
|
+
# tag 'hoop.foo.bar' is shrinked as 'foo.bar'
|
65
|
+
remove_prefix hoop
|
66
|
+
|
67
|
+
# used for tags only remove_prefix string, like 'hoop'
|
68
|
+
default_tag unknown
|
69
|
+
</match>
|
70
|
+
|
71
|
+
== TODO
|
72
|
+
|
73
|
+
- consider what to do next
|
74
|
+
- patches welcome!
|
75
|
+
|
76
|
+
== Copyright
|
77
|
+
|
78
|
+
Copyright:: Copyright (c) 2011- TAGOMORI Satoshi (tagomoris)
|
79
|
+
License:: Apache License, Version 2.0
|
data/Rakefile
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "fluent-plugin-hoop"
|
18
|
+
gem.description = "Hoop (HDFS http-fs) plugin for Fluent event collector"
|
19
|
+
gem.homepage = "http://github.com/tagomoris/fluent-plugin-hoop"
|
20
|
+
gem.summary = gem.description
|
21
|
+
# gem.version = File.read("VERSION").strip
|
22
|
+
gem.authors = ["TAGOMORI Satoshi"]
|
23
|
+
gem.email = "tagomoris@gmail.com"
|
24
|
+
gem.has_rdoc = false
|
25
|
+
# gem.license = "Apache License v2.0"
|
26
|
+
gem.files = `git ls-files`.split("\n")
|
27
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
28
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
29
|
+
gem.require_paths = ['lib']
|
30
|
+
gem.add_dependency "fluentd", "~> 0.10.8"
|
31
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
32
|
+
gem.add_development_dependency "simplecov", ">= 0.5.4"
|
33
|
+
end
|
34
|
+
Jeweler::RubygemsDotOrgTasks.new
|
35
|
+
|
36
|
+
require 'rake/testtask'
|
37
|
+
Rake::TestTask.new(:test) do |test|
|
38
|
+
unless ENV['DEBUG']
|
39
|
+
ENV['FLUENT_TEST_DEBUG'] = 'TRUE'
|
40
|
+
end
|
41
|
+
test.libs << 'lib' << 'test'
|
42
|
+
test.pattern = 'test/**/test_*.rb'
|
43
|
+
test.verbose = true
|
44
|
+
end
|
45
|
+
|
46
|
+
require 'rcov/rcovtask'
|
47
|
+
Rcov::RcovTask.new do |test|
|
48
|
+
test.libs << 'test'
|
49
|
+
test.pattern = 'test/**/test_*.rb'
|
50
|
+
test.verbose = true
|
51
|
+
test.rcov_opts << '--exclude "gems/*"'
|
52
|
+
end
|
53
|
+
|
54
|
+
task :default => :test
|
55
|
+
|
56
|
+
require 'rdoc/task'
|
57
|
+
Rake::RDocTask.new do |rdoc|
|
58
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
59
|
+
|
60
|
+
rdoc.rdoc_dir = 'rdoc'
|
61
|
+
rdoc.title = "fluent-plugin-hoop #{version}"
|
62
|
+
rdoc.rdoc_files.include('README*')
|
63
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
64
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{fluent-plugin-hoop}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = [%q{TAGOMORI Satoshi}]
|
12
|
+
s.date = %q{2011-12-26}
|
13
|
+
s.description = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
|
14
|
+
s.email = %q{tagomoris@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
".gitmodules",
|
23
|
+
"AUTHORS",
|
24
|
+
"Gemfile",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"fluent-plugin-hoop.gemspec",
|
30
|
+
"lib/fluent/plugin/out_hoop.rb",
|
31
|
+
"test/helper.rb",
|
32
|
+
"test/plugin/test_out_hoop.rb",
|
33
|
+
"test/plugin/test_out_hoop_realserver.rb",
|
34
|
+
"test/plugin/test_out_hoop_reconnect.rb"
|
35
|
+
]
|
36
|
+
s.homepage = %q{http://github.com/tagomoris/fluent-plugin-hoop}
|
37
|
+
s.require_paths = [%q{lib}]
|
38
|
+
s.rubygems_version = %q{1.8.6}
|
39
|
+
s.summary = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
|
40
|
+
s.test_files = [%q{test/helper.rb}, %q{test/plugin/test_out_hoop.rb}, %q{test/plugin/test_out_hoop_realserver.rb}, %q{test/plugin/test_out_hoop_reconnect.rb}]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
s.specification_version = 3
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
46
|
+
s.add_runtime_dependency(%q<rdoc>, [">= 0"])
|
47
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
48
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
49
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
50
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
51
|
+
s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.8"])
|
52
|
+
s.add_development_dependency(%q<rake>, [">= 0.9.2"])
|
53
|
+
s.add_development_dependency(%q<simplecov>, [">= 0.5.4"])
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
56
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
57
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
59
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
60
|
+
s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
|
61
|
+
s.add_dependency(%q<rake>, [">= 0.9.2"])
|
62
|
+
s.add_dependency(%q<simplecov>, [">= 0.5.4"])
|
63
|
+
end
|
64
|
+
else
|
65
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
66
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
67
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
68
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
69
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
70
|
+
s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
|
71
|
+
s.add_dependency(%q<rake>, [">= 0.9.2"])
|
72
|
+
s.add_dependency(%q<simplecov>, [">= 0.5.4"])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
@@ -0,0 +1,339 @@
|
|
1
|
+
module FluentExt; end
|
2
|
+
module FluentExt::PlainTextFormatterMixin
|
3
|
+
# config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
|
4
|
+
|
5
|
+
attr_accessor :output_include_time, :output_include_tag, :output_data_type
|
6
|
+
attr_accessor :add_newline, :field_separator
|
7
|
+
attr_accessor :remove_prefix, :default_tag
|
8
|
+
|
9
|
+
def configure(conf)
|
10
|
+
super
|
11
|
+
|
12
|
+
@output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
|
13
|
+
@output_include_time = true if @output_include_time.nil?
|
14
|
+
|
15
|
+
@output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
|
16
|
+
@output_include_tag = true if @output_include_tag.nil?
|
17
|
+
|
18
|
+
@output_data_type = conf['output_data_type']
|
19
|
+
@output_data_type = 'json' if @output_data_type.nil?
|
20
|
+
|
21
|
+
@field_separator = case @field_separator
|
22
|
+
when 'SPACE' then ' '
|
23
|
+
when 'COMMA' then ','
|
24
|
+
else "\t"
|
25
|
+
end
|
26
|
+
@add_newline = Fluent::Config.bool_value(conf['add_newline'])
|
27
|
+
if @add_newline.nil?
|
28
|
+
@add_newline = true
|
29
|
+
end
|
30
|
+
|
31
|
+
@remove_prefix = conf['remove_prefix']
|
32
|
+
if @remove_prefix
|
33
|
+
@removed_prefix_string = @remove_prefix + '.'
|
34
|
+
@removed_length = @removed_prefix_string.length
|
35
|
+
end
|
36
|
+
if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
|
37
|
+
@default_tag = conf['default_tag']
|
38
|
+
if @default_tag.nil? or @default_tag.length < 1
|
39
|
+
raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# default timezone: utc
|
44
|
+
if conf['localtime'].nil? and conf['utc'].nil?
|
45
|
+
@utc = true
|
46
|
+
@localtime = false
|
47
|
+
elsif not @localtime and not @utc
|
48
|
+
@utc = true
|
49
|
+
@localtime = false
|
50
|
+
end
|
51
|
+
# mix-in default time formatter (or you can overwrite @timef on your own configure)
|
52
|
+
@timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
|
53
|
+
|
54
|
+
@custom_attributes = []
|
55
|
+
if @output_data_type == 'json'
|
56
|
+
self.instance_eval {
|
57
|
+
def stringify_record(record)
|
58
|
+
record.to_json
|
59
|
+
end
|
60
|
+
}
|
61
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
62
|
+
@custom_attributes = $1.split(',')
|
63
|
+
if @custom_attributes.size > 1
|
64
|
+
self.instance_eval {
|
65
|
+
def stringify_record(record)
|
66
|
+
@custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@field_separator)
|
67
|
+
end
|
68
|
+
}
|
69
|
+
elsif @custom_attributes.size == 1
|
70
|
+
self.instance_eval {
|
71
|
+
def stringify_record(record)
|
72
|
+
(record[@custom_attributes[0]] || 'NULL').to_s
|
73
|
+
end
|
74
|
+
}
|
75
|
+
else
|
76
|
+
raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
|
77
|
+
end
|
78
|
+
else
|
79
|
+
raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
|
80
|
+
end
|
81
|
+
|
82
|
+
if @output_include_time and @output_include_tag
|
83
|
+
if @add_newline and @remove_prefix
|
84
|
+
self.instance_eval {
|
85
|
+
def format(tag,time,record)
|
86
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
87
|
+
tag == @remove_prefix
|
88
|
+
tag = tag[@removed_length..-1] || @default_tag
|
89
|
+
end
|
90
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
|
91
|
+
end
|
92
|
+
}
|
93
|
+
elsif @add_newline
|
94
|
+
self.instance_eval {
|
95
|
+
def format(tag,time,record)
|
96
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
|
97
|
+
end
|
98
|
+
}
|
99
|
+
elsif @remove_prefix
|
100
|
+
self.instance_eval {
|
101
|
+
def format(tag,time,record)
|
102
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
103
|
+
tag == @remove_prefix
|
104
|
+
tag = tag[@removed_length..-1] || @default_tag
|
105
|
+
end
|
106
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
|
107
|
+
end
|
108
|
+
}
|
109
|
+
else
|
110
|
+
self.instance_eval {
|
111
|
+
def format(tag,time,record)
|
112
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
|
113
|
+
end
|
114
|
+
}
|
115
|
+
end
|
116
|
+
elsif @output_include_time
|
117
|
+
if @add_newline
|
118
|
+
self.instance_eval {
|
119
|
+
def format(tag,time,record);
|
120
|
+
@timef.format(time) + @field_separator + stringify_record(record) + "\n"
|
121
|
+
end
|
122
|
+
}
|
123
|
+
else
|
124
|
+
self.instance_eval {
|
125
|
+
def format(tag,time,record);
|
126
|
+
@timef.format(time) + @field_separator + stringify_record(record)
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
elsif @output_include_tag
|
131
|
+
if @add_newline and @remove_prefix
|
132
|
+
self.instance_eval {
|
133
|
+
def format(tag,time,record)
|
134
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
135
|
+
tag == @remove_prefix
|
136
|
+
tag = tag[@removed_length..-1] || @default_tag
|
137
|
+
end
|
138
|
+
tag + @field_separator + stringify_record(record) + "\n"
|
139
|
+
end
|
140
|
+
}
|
141
|
+
elsif @add_newline
|
142
|
+
self.instance_eval {
|
143
|
+
def format(tag,time,record)
|
144
|
+
tag + @field_separator + stringify_record(record) + "\n"
|
145
|
+
end
|
146
|
+
}
|
147
|
+
elsif @remove_prefix
|
148
|
+
self.instance_eval {
|
149
|
+
def format(tag,time,record)
|
150
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
151
|
+
tag == @remove_prefix
|
152
|
+
tag = tag[@removed_length..-1] || @default_tag
|
153
|
+
end
|
154
|
+
tag + @field_separator + stringify_record(record)
|
155
|
+
end
|
156
|
+
}
|
157
|
+
else
|
158
|
+
self.instance_eval {
|
159
|
+
def format(tag,time,record)
|
160
|
+
tag + @field_separator + stringify_record(record)
|
161
|
+
end
|
162
|
+
}
|
163
|
+
end
|
164
|
+
else # without time, tag
|
165
|
+
if @add_newline
|
166
|
+
self.instance_eval {
|
167
|
+
def format(tag,time,record);
|
168
|
+
stringify_record(record) + "\n"
|
169
|
+
end
|
170
|
+
}
|
171
|
+
else
|
172
|
+
self.instance_eval {
|
173
|
+
def format(tag,time,record);
|
174
|
+
stringify_record(record)
|
175
|
+
end
|
176
|
+
}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def stringify_record(record)
|
182
|
+
record.to_json
|
183
|
+
end
|
184
|
+
|
185
|
+
def format(tag, time, record)
|
186
|
+
if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
|
187
|
+
tag = tag[@removed_length..-1] || @default_tag
|
188
|
+
end
|
189
|
+
time_str = if @output_include_time
|
190
|
+
@timef.format(time) + @field_separator
|
191
|
+
else
|
192
|
+
''
|
193
|
+
end
|
194
|
+
tag_str = if @output_include_tag
|
195
|
+
tag + @field_separator
|
196
|
+
else
|
197
|
+
''
|
198
|
+
end
|
199
|
+
time_str + tag_str + stringify_record(record) + "\n"
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
class Fluent::HoopOutput < Fluent::TimeSlicedOutput
|
205
|
+
Fluent::Plugin.register_output('hoop', self)
|
206
|
+
|
207
|
+
config_set_default :buffer_type, 'memory'
|
208
|
+
config_set_default :time_slice_format, '%Y%m%d' # %Y%m%d%H
|
209
|
+
# config_param :tag_format, :string, :default => 'all' # or 'last'(last.part.of.tag => tag) or 'none'
|
210
|
+
|
211
|
+
config_param :hoop_server, :string # host:port
|
212
|
+
config_param :path, :string # /path/pattern/to/hdfs/file can use %Y %m %d %H %M %S and %T(tag, not-supported-yet)
|
213
|
+
config_param :username, :string # hoop pseudo username
|
214
|
+
|
215
|
+
include FluentExt::PlainTextFormatterMixin
|
216
|
+
config_set_default :output_include_time, true
|
217
|
+
config_set_default :output_include_tag, true
|
218
|
+
config_set_default :output_data_type, 'json'
|
219
|
+
config_set_default :field_separator, "\t"
|
220
|
+
config_set_default :add_newline, true
|
221
|
+
config_set_default :remove_prefix, nil
|
222
|
+
|
223
|
+
def initialize
|
224
|
+
super
|
225
|
+
require 'net/http'
|
226
|
+
require 'time'
|
227
|
+
end
|
228
|
+
|
229
|
+
def configure(conf)
|
230
|
+
if conf['path']
|
231
|
+
if conf['path'].index('%S')
|
232
|
+
conf['time_slice_format'] = '%Y%m%d%H%M%S'
|
233
|
+
elsif conf['path'].index('%M')
|
234
|
+
conf['time_slice_format'] = '%Y%m%d%H%M'
|
235
|
+
elsif conf['path'].index('%H')
|
236
|
+
conf['time_slice_format'] = '%Y%m%d%H'
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
super
|
241
|
+
|
242
|
+
unless /\A([a-zA-Z0-9][-a-zA-Z0-9.]*):(\d+)\Z/ =~ @hoop_server
|
243
|
+
raise Fluent::ConfigError, "Invalid config value on hoop_server: '#{@hoop_server}', needs SERVER_NAME:PORT"
|
244
|
+
end
|
245
|
+
@host = $1
|
246
|
+
@port = $2.to_i
|
247
|
+
unless @path.index('/') == 0
|
248
|
+
raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
|
249
|
+
end
|
250
|
+
@conn = nil
|
251
|
+
@header = {'Content-Type' => 'application/octet-stream'}
|
252
|
+
|
253
|
+
@f_separator = case @field_separator
|
254
|
+
when 'SPACE' then ' '
|
255
|
+
when 'COMMA' then ','
|
256
|
+
else "\t"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def start
|
261
|
+
super
|
262
|
+
|
263
|
+
# okey, net/http has reconnect feature. see test_out_hoop_reconnect.rb
|
264
|
+
conn = Net::HTTP.start(@host, @port)
|
265
|
+
begin
|
266
|
+
res = conn.request_get("/?op=status&user.name=#{@username}")
|
267
|
+
if res.code.to_i < 300 and res['Set-Cookie']
|
268
|
+
@authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
|
269
|
+
else
|
270
|
+
$log.error "initalize request failed, code: #{res.code}, message: #{res.body}"
|
271
|
+
raise Fluent::ConfigError, "initalize request failed, code: #{res.code}, message: #{res.body}"
|
272
|
+
end
|
273
|
+
rescue
|
274
|
+
$log.error "failed to connect hoop server: #{@host} port #{@port}"
|
275
|
+
raise
|
276
|
+
end
|
277
|
+
conn.finish
|
278
|
+
$log.info "connected hoop server: #{@host} port #{@port}"
|
279
|
+
end
|
280
|
+
|
281
|
+
def shutdown
|
282
|
+
super
|
283
|
+
end
|
284
|
+
|
285
|
+
def record_to_string(record)
|
286
|
+
record.to_json
|
287
|
+
end
|
288
|
+
|
289
|
+
def format(tag, time, record)
|
290
|
+
time_str = @timef.format(time)
|
291
|
+
time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
|
292
|
+
end
|
293
|
+
|
294
|
+
def path_format(chunk_key)
|
295
|
+
Time.strptime(chunk_key, @time_slice_format).strftime(@path)
|
296
|
+
end
|
297
|
+
|
298
|
+
def send_data(path, data, retries=0)
|
299
|
+
conn = Net::HTTP.start(@host, @port)
|
300
|
+
conn.read_timeout = 5
|
301
|
+
res = conn.request_put(path + "?op=append", data, @authorized_header)
|
302
|
+
if res.code == '401'
|
303
|
+
res = conn.request_get("/?op=status&user.name=#{@username}")
|
304
|
+
if res.code.to_i < 300 and res['Set-Cookie']
|
305
|
+
@authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
|
306
|
+
else
|
307
|
+
$log.error "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
|
308
|
+
raise Fluent::ConfigError, "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
|
309
|
+
end
|
310
|
+
res = conn.request_put(hdfs_path + "?op=append", data, @authorized_header)
|
311
|
+
end
|
312
|
+
if res.code == '404'
|
313
|
+
res = conn.request_post(path + "?op=create&overwrite=false", data, @authorized_header)
|
314
|
+
end
|
315
|
+
if res.code == '500'
|
316
|
+
if retries >= 3
|
317
|
+
raise StandardError, "failed to send_data with retry 3 times InternalServerError"
|
318
|
+
end
|
319
|
+
sleep 0.3 # yes, this is a magic number
|
320
|
+
res = send_data(path, data, retries + 1)
|
321
|
+
end
|
322
|
+
conn.finish
|
323
|
+
if res.code != '200' and res.code != '201'
|
324
|
+
$log.warn "failed to write data to path: #{path}, code: #{res.code} #{res.message}"
|
325
|
+
end
|
326
|
+
res
|
327
|
+
end
|
328
|
+
|
329
|
+
def write(chunk)
|
330
|
+
hdfs_path = path_format(chunk.key)
|
331
|
+
begin
|
332
|
+
send_data(hdfs_path, chunk.read)
|
333
|
+
rescue
|
334
|
+
$log.error "failed to communicate server, #{@host} port #{@port}, path: #{hdfs_path}"
|
335
|
+
raise
|
336
|
+
end
|
337
|
+
hdfs_path
|
338
|
+
end
|
339
|
+
end
|