fluent-plugin-hoop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +31 -0
- data/.gitmodules +3 -0
- data/AUTHORS +1 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +13 -0
- data/README.rdoc +79 -0
- data/Rakefile +64 -0
- data/VERSION +1 -0
- data/fluent-plugin-hoop.gemspec +75 -0
- data/lib/fluent/plugin/out_hoop.rb +339 -0
- data/test/helper.rb +53 -0
- data/test/plugin/test_out_hoop.rb +441 -0
- data/test/plugin/test_out_hoop_realserver.rb +145 -0
- data/test/plugin/test_out_hoop_reconnect.rb +185 -0
- metadata +156 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# rcov generated
|
2
|
+
coverage
|
3
|
+
|
4
|
+
# rdoc generated
|
5
|
+
rdoc
|
6
|
+
|
7
|
+
# yard generated
|
8
|
+
doc
|
9
|
+
.yardoc
|
10
|
+
|
11
|
+
# bundler
|
12
|
+
.bundle
|
13
|
+
|
14
|
+
# jeweler generated
|
15
|
+
pkg
|
16
|
+
|
17
|
+
# For MacOS
|
18
|
+
.DS_Store
|
19
|
+
|
20
|
+
# For TextMate, emacs, vim
|
21
|
+
*.tmproj
|
22
|
+
tmtags
|
23
|
+
*~
|
24
|
+
\#*
|
25
|
+
.\#*
|
26
|
+
*.swp
|
27
|
+
|
28
|
+
# not to lock gems version, and for bundler
|
29
|
+
Gemfile.lock
|
30
|
+
vendor
|
31
|
+
vendor/fluentd
|
data/.gitmodules
ADDED
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
TAGOMORI Satoshi <tagomoris _at_ gmail.com>
|
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "shoulda", ">= 0"
|
10
|
+
gem "bundler", "~> 1.0.0"
|
11
|
+
gem "jeweler", "~> 1.6.4"
|
12
|
+
gem "rcov", ">= 0"
|
13
|
+
end
|
14
|
+
|
15
|
+
gem "rdoc"
|
16
|
+
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2011 TAGOMORI Satoshi
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.rdoc
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
= Hoop plugin for Fluentd
|
2
|
+
|
3
|
+
== Component
|
4
|
+
|
5
|
+
=== HoopOutput
|
6
|
+
|
7
|
+
Store fluent-event as plain text to HDFS, over Hoop (HDFS http-fs).
|
8
|
+
|
9
|
+
Hoop is originally written in Cloudera, and merged on Apache Hadoop 0.23 tree. See:
|
10
|
+
|
11
|
+
[Apache Hadoop dev doc] https://github.com/apache/hadoop-common/blob/trunk/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm
|
12
|
+
[Cloudera Hoop doc (obsolete)] http://cloudera.github.com/hoop/docs/latest/index.html
|
13
|
+
|
14
|
+
HoopOutput slices data by time (for specified units), and store these data as plain text on hdfs. You can specify to:
|
15
|
+
|
16
|
+
- format whole data as serialized JSON, single attribute or separated multi attributes
|
17
|
+
- include time as line header, or not
|
18
|
+
- include tag as line header, or not
|
19
|
+
- change field separator (default: TAB)
|
20
|
+
|
21
|
+
== Configuration
|
22
|
+
|
23
|
+
=== HoopOutput
|
24
|
+
|
25
|
+
Minimal configuration (output: TAB separated time,tag,json-serialized-data and terminated with newline):
|
26
|
+
|
27
|
+
<match hoop.**>
|
28
|
+
type hoop
|
29
|
+
hoop_server hoop-server.local:14000
|
30
|
+
|
31
|
+
# %Y %m %d %H %M %S are available as conversion specifications in path on hdfs
|
32
|
+
# If '%Y%m%d' specified, logs are sliced into per-day files automatically.
|
33
|
+
path /hoop/log-%Y%m/log-%Y%m%d.log
|
34
|
+
|
35
|
+
# 'username' is used pseudo authentication, see http://cloudera.github.com/hoop/docs/latest/HttpRestApi.html
|
36
|
+
username hoopuser
|
37
|
+
</match>
|
38
|
+
|
39
|
+
You will get output like below in hdfs file such as '/hoop/log-201112/log-20111231.log'
|
40
|
+
|
41
|
+
2011-12-31T13:14:15Z [TAB] hoop.foo.bar [TAB] {"field1":12345,"field2":"one two three four five","field3":"OK"} [terminated by newline]
|
42
|
+
2011-12-31T21:22:23Z [TAB] hoop.foo.val [TAB] {"field1":23456,"field2":"two three four five six","field3":"BAD"} [terminated by newline]
|
43
|
+
|
44
|
+
Single attribute with tag (removed prefix 'hoop.'), without time, separated by SPACE and NOT to terminate by newline ('message' data will be terminated with newline).
|
45
|
+
|
46
|
+
<match hoop.**>
|
47
|
+
type hoop
|
48
|
+
hoop_server hoop-server.local:14000
|
49
|
+
path /hoop/log-%Y%m/log-%Y%m%d-%H.log
|
50
|
+
username hoopuser
|
51
|
+
|
52
|
+
output_include_time false
|
53
|
+
output_include_tag true
|
54
|
+
|
55
|
+
# If you want multiple attribute, specify like 'attr:field1,field2,field3'
|
56
|
+
output_data_type attr:message
|
57
|
+
|
58
|
+
# field_separator allows 'SPACE', 'COMMA' and 'TAB'(default)
|
59
|
+
field_separator SPACE
|
60
|
+
|
61
|
+
# add_newline 's default is true
|
62
|
+
add_newline false
|
63
|
+
|
64
|
+
# tag 'hoop.foo.bar' is shrinked as 'foo.bar'
|
65
|
+
remove_prefix hoop
|
66
|
+
|
67
|
+
# used for tags only remove_prefix string, like 'hoop'
|
68
|
+
default_tag unknown
|
69
|
+
</match>
|
70
|
+
|
71
|
+
== TODO
|
72
|
+
|
73
|
+
- consider what to do next
|
74
|
+
- patches welcome!
|
75
|
+
|
76
|
+
== Copyright
|
77
|
+
|
78
|
+
Copyright:: Copyright (c) 2011- TAGOMORI Satoshi (tagomoris)
|
79
|
+
License:: Apache License, Version 2.0
|
data/Rakefile
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "fluent-plugin-hoop"
|
18
|
+
gem.description = "Hoop (HDFS http-fs) plugin for Fluent event collector"
|
19
|
+
gem.homepage = "http://github.com/tagomoris/fluent-plugin-hoop"
|
20
|
+
gem.summary = gem.description
|
21
|
+
# gem.version = File.read("VERSION").strip
|
22
|
+
gem.authors = ["TAGOMORI Satoshi"]
|
23
|
+
gem.email = "tagomoris@gmail.com"
|
24
|
+
gem.has_rdoc = false
|
25
|
+
# gem.license = "Apache License v2.0"
|
26
|
+
gem.files = `git ls-files`.split("\n")
|
27
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
28
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
29
|
+
gem.require_paths = ['lib']
|
30
|
+
gem.add_dependency "fluentd", "~> 0.10.8"
|
31
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
32
|
+
gem.add_development_dependency "simplecov", ">= 0.5.4"
|
33
|
+
end
|
34
|
+
Jeweler::RubygemsDotOrgTasks.new
|
35
|
+
|
36
|
+
require 'rake/testtask'
|
37
|
+
Rake::TestTask.new(:test) do |test|
|
38
|
+
unless ENV['DEBUG']
|
39
|
+
ENV['FLUENT_TEST_DEBUG'] = 'TRUE'
|
40
|
+
end
|
41
|
+
test.libs << 'lib' << 'test'
|
42
|
+
test.pattern = 'test/**/test_*.rb'
|
43
|
+
test.verbose = true
|
44
|
+
end
|
45
|
+
|
46
|
+
require 'rcov/rcovtask'
|
47
|
+
Rcov::RcovTask.new do |test|
|
48
|
+
test.libs << 'test'
|
49
|
+
test.pattern = 'test/**/test_*.rb'
|
50
|
+
test.verbose = true
|
51
|
+
test.rcov_opts << '--exclude "gems/*"'
|
52
|
+
end
|
53
|
+
|
54
|
+
task :default => :test
|
55
|
+
|
56
|
+
require 'rdoc/task'
|
57
|
+
Rake::RDocTask.new do |rdoc|
|
58
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
59
|
+
|
60
|
+
rdoc.rdoc_dir = 'rdoc'
|
61
|
+
rdoc.title = "fluent-plugin-hoop #{version}"
|
62
|
+
rdoc.rdoc_files.include('README*')
|
63
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
64
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{fluent-plugin-hoop}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = [%q{TAGOMORI Satoshi}]
|
12
|
+
s.date = %q{2011-12-26}
|
13
|
+
s.description = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
|
14
|
+
s.email = %q{tagomoris@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
".gitmodules",
|
23
|
+
"AUTHORS",
|
24
|
+
"Gemfile",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"fluent-plugin-hoop.gemspec",
|
30
|
+
"lib/fluent/plugin/out_hoop.rb",
|
31
|
+
"test/helper.rb",
|
32
|
+
"test/plugin/test_out_hoop.rb",
|
33
|
+
"test/plugin/test_out_hoop_realserver.rb",
|
34
|
+
"test/plugin/test_out_hoop_reconnect.rb"
|
35
|
+
]
|
36
|
+
s.homepage = %q{http://github.com/tagomoris/fluent-plugin-hoop}
|
37
|
+
s.require_paths = [%q{lib}]
|
38
|
+
s.rubygems_version = %q{1.8.6}
|
39
|
+
s.summary = %q{Hoop (HDFS http-fs) plugin for Fluent event collector}
|
40
|
+
s.test_files = [%q{test/helper.rb}, %q{test/plugin/test_out_hoop.rb}, %q{test/plugin/test_out_hoop_realserver.rb}, %q{test/plugin/test_out_hoop_reconnect.rb}]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
s.specification_version = 3
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
46
|
+
s.add_runtime_dependency(%q<rdoc>, [">= 0"])
|
47
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
48
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
49
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
50
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
51
|
+
s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.8"])
|
52
|
+
s.add_development_dependency(%q<rake>, [">= 0.9.2"])
|
53
|
+
s.add_development_dependency(%q<simplecov>, [">= 0.5.4"])
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
56
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
57
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
59
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
60
|
+
s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
|
61
|
+
s.add_dependency(%q<rake>, [">= 0.9.2"])
|
62
|
+
s.add_dependency(%q<simplecov>, [">= 0.5.4"])
|
63
|
+
end
|
64
|
+
else
|
65
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
66
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
67
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
68
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
69
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
70
|
+
s.add_dependency(%q<fluentd>, ["~> 0.10.8"])
|
71
|
+
s.add_dependency(%q<rake>, [">= 0.9.2"])
|
72
|
+
s.add_dependency(%q<simplecov>, [">= 0.5.4"])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
@@ -0,0 +1,339 @@
|
|
1
|
+
module FluentExt; end
|
2
|
+
module FluentExt::PlainTextFormatterMixin
|
3
|
+
# config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
|
4
|
+
|
5
|
+
attr_accessor :output_include_time, :output_include_tag, :output_data_type
|
6
|
+
attr_accessor :add_newline, :field_separator
|
7
|
+
attr_accessor :remove_prefix, :default_tag
|
8
|
+
|
9
|
+
def configure(conf)
|
10
|
+
super
|
11
|
+
|
12
|
+
@output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
|
13
|
+
@output_include_time = true if @output_include_time.nil?
|
14
|
+
|
15
|
+
@output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
|
16
|
+
@output_include_tag = true if @output_include_tag.nil?
|
17
|
+
|
18
|
+
@output_data_type = conf['output_data_type']
|
19
|
+
@output_data_type = 'json' if @output_data_type.nil?
|
20
|
+
|
21
|
+
@field_separator = case @field_separator
|
22
|
+
when 'SPACE' then ' '
|
23
|
+
when 'COMMA' then ','
|
24
|
+
else "\t"
|
25
|
+
end
|
26
|
+
@add_newline = Fluent::Config.bool_value(conf['add_newline'])
|
27
|
+
if @add_newline.nil?
|
28
|
+
@add_newline = true
|
29
|
+
end
|
30
|
+
|
31
|
+
@remove_prefix = conf['remove_prefix']
|
32
|
+
if @remove_prefix
|
33
|
+
@removed_prefix_string = @remove_prefix + '.'
|
34
|
+
@removed_length = @removed_prefix_string.length
|
35
|
+
end
|
36
|
+
if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
|
37
|
+
@default_tag = conf['default_tag']
|
38
|
+
if @default_tag.nil? or @default_tag.length < 1
|
39
|
+
raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# default timezone: utc
|
44
|
+
if conf['localtime'].nil? and conf['utc'].nil?
|
45
|
+
@utc = true
|
46
|
+
@localtime = false
|
47
|
+
elsif not @localtime and not @utc
|
48
|
+
@utc = true
|
49
|
+
@localtime = false
|
50
|
+
end
|
51
|
+
# mix-in default time formatter (or you can overwrite @timef on your own configure)
|
52
|
+
@timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
|
53
|
+
|
54
|
+
@custom_attributes = []
|
55
|
+
if @output_data_type == 'json'
|
56
|
+
self.instance_eval {
|
57
|
+
def stringify_record(record)
|
58
|
+
record.to_json
|
59
|
+
end
|
60
|
+
}
|
61
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
62
|
+
@custom_attributes = $1.split(',')
|
63
|
+
if @custom_attributes.size > 1
|
64
|
+
self.instance_eval {
|
65
|
+
def stringify_record(record)
|
66
|
+
@custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@field_separator)
|
67
|
+
end
|
68
|
+
}
|
69
|
+
elsif @custom_attributes.size == 1
|
70
|
+
self.instance_eval {
|
71
|
+
def stringify_record(record)
|
72
|
+
(record[@custom_attributes[0]] || 'NULL').to_s
|
73
|
+
end
|
74
|
+
}
|
75
|
+
else
|
76
|
+
raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
|
77
|
+
end
|
78
|
+
else
|
79
|
+
raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
|
80
|
+
end
|
81
|
+
|
82
|
+
if @output_include_time and @output_include_tag
|
83
|
+
if @add_newline and @remove_prefix
|
84
|
+
self.instance_eval {
|
85
|
+
def format(tag,time,record)
|
86
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
87
|
+
tag == @remove_prefix
|
88
|
+
tag = tag[@removed_length..-1] || @default_tag
|
89
|
+
end
|
90
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
|
91
|
+
end
|
92
|
+
}
|
93
|
+
elsif @add_newline
|
94
|
+
self.instance_eval {
|
95
|
+
def format(tag,time,record)
|
96
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record) + "\n"
|
97
|
+
end
|
98
|
+
}
|
99
|
+
elsif @remove_prefix
|
100
|
+
self.instance_eval {
|
101
|
+
def format(tag,time,record)
|
102
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
103
|
+
tag == @remove_prefix
|
104
|
+
tag = tag[@removed_length..-1] || @default_tag
|
105
|
+
end
|
106
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
|
107
|
+
end
|
108
|
+
}
|
109
|
+
else
|
110
|
+
self.instance_eval {
|
111
|
+
def format(tag,time,record)
|
112
|
+
@timef.format(time) + @field_separator + tag + @field_separator + stringify_record(record)
|
113
|
+
end
|
114
|
+
}
|
115
|
+
end
|
116
|
+
elsif @output_include_time
|
117
|
+
if @add_newline
|
118
|
+
self.instance_eval {
|
119
|
+
def format(tag,time,record);
|
120
|
+
@timef.format(time) + @field_separator + stringify_record(record) + "\n"
|
121
|
+
end
|
122
|
+
}
|
123
|
+
else
|
124
|
+
self.instance_eval {
|
125
|
+
def format(tag,time,record);
|
126
|
+
@timef.format(time) + @field_separator + stringify_record(record)
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
elsif @output_include_tag
|
131
|
+
if @add_newline and @remove_prefix
|
132
|
+
self.instance_eval {
|
133
|
+
def format(tag,time,record)
|
134
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
135
|
+
tag == @remove_prefix
|
136
|
+
tag = tag[@removed_length..-1] || @default_tag
|
137
|
+
end
|
138
|
+
tag + @field_separator + stringify_record(record) + "\n"
|
139
|
+
end
|
140
|
+
}
|
141
|
+
elsif @add_newline
|
142
|
+
self.instance_eval {
|
143
|
+
def format(tag,time,record)
|
144
|
+
tag + @field_separator + stringify_record(record) + "\n"
|
145
|
+
end
|
146
|
+
}
|
147
|
+
elsif @remove_prefix
|
148
|
+
self.instance_eval {
|
149
|
+
def format(tag,time,record)
|
150
|
+
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
151
|
+
tag == @remove_prefix
|
152
|
+
tag = tag[@removed_length..-1] || @default_tag
|
153
|
+
end
|
154
|
+
tag + @field_separator + stringify_record(record)
|
155
|
+
end
|
156
|
+
}
|
157
|
+
else
|
158
|
+
self.instance_eval {
|
159
|
+
def format(tag,time,record)
|
160
|
+
tag + @field_separator + stringify_record(record)
|
161
|
+
end
|
162
|
+
}
|
163
|
+
end
|
164
|
+
else # without time, tag
|
165
|
+
if @add_newline
|
166
|
+
self.instance_eval {
|
167
|
+
def format(tag,time,record);
|
168
|
+
stringify_record(record) + "\n"
|
169
|
+
end
|
170
|
+
}
|
171
|
+
else
|
172
|
+
self.instance_eval {
|
173
|
+
def format(tag,time,record);
|
174
|
+
stringify_record(record)
|
175
|
+
end
|
176
|
+
}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def stringify_record(record)
|
182
|
+
record.to_json
|
183
|
+
end
|
184
|
+
|
185
|
+
def format(tag, time, record)
|
186
|
+
if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
|
187
|
+
tag = tag[@removed_length..-1] || @default_tag
|
188
|
+
end
|
189
|
+
time_str = if @output_include_time
|
190
|
+
@timef.format(time) + @field_separator
|
191
|
+
else
|
192
|
+
''
|
193
|
+
end
|
194
|
+
tag_str = if @output_include_tag
|
195
|
+
tag + @field_separator
|
196
|
+
else
|
197
|
+
''
|
198
|
+
end
|
199
|
+
time_str + tag_str + stringify_record(record) + "\n"
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
class Fluent::HoopOutput < Fluent::TimeSlicedOutput
|
205
|
+
Fluent::Plugin.register_output('hoop', self)
|
206
|
+
|
207
|
+
config_set_default :buffer_type, 'memory'
|
208
|
+
config_set_default :time_slice_format, '%Y%m%d' # %Y%m%d%H
|
209
|
+
# config_param :tag_format, :string, :default => 'all' # or 'last'(last.part.of.tag => tag) or 'none'
|
210
|
+
|
211
|
+
config_param :hoop_server, :string # host:port
|
212
|
+
config_param :path, :string # /path/pattern/to/hdfs/file can use %Y %m %d %H %M %S and %T(tag, not-supported-yet)
|
213
|
+
config_param :username, :string # hoop pseudo username
|
214
|
+
|
215
|
+
include FluentExt::PlainTextFormatterMixin
|
216
|
+
config_set_default :output_include_time, true
|
217
|
+
config_set_default :output_include_tag, true
|
218
|
+
config_set_default :output_data_type, 'json'
|
219
|
+
config_set_default :field_separator, "\t"
|
220
|
+
config_set_default :add_newline, true
|
221
|
+
config_set_default :remove_prefix, nil
|
222
|
+
|
223
|
+
def initialize
|
224
|
+
super
|
225
|
+
require 'net/http'
|
226
|
+
require 'time'
|
227
|
+
end
|
228
|
+
|
229
|
+
def configure(conf)
|
230
|
+
if conf['path']
|
231
|
+
if conf['path'].index('%S')
|
232
|
+
conf['time_slice_format'] = '%Y%m%d%H%M%S'
|
233
|
+
elsif conf['path'].index('%M')
|
234
|
+
conf['time_slice_format'] = '%Y%m%d%H%M'
|
235
|
+
elsif conf['path'].index('%H')
|
236
|
+
conf['time_slice_format'] = '%Y%m%d%H'
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
super
|
241
|
+
|
242
|
+
unless /\A([a-zA-Z0-9][-a-zA-Z0-9.]*):(\d+)\Z/ =~ @hoop_server
|
243
|
+
raise Fluent::ConfigError, "Invalid config value on hoop_server: '#{@hoop_server}', needs SERVER_NAME:PORT"
|
244
|
+
end
|
245
|
+
@host = $1
|
246
|
+
@port = $2.to_i
|
247
|
+
unless @path.index('/') == 0
|
248
|
+
raise Fluent::ConfigError, "Path on hdfs MUST starts with '/', but '#{@path}'"
|
249
|
+
end
|
250
|
+
@conn = nil
|
251
|
+
@header = {'Content-Type' => 'application/octet-stream'}
|
252
|
+
|
253
|
+
@f_separator = case @field_separator
|
254
|
+
when 'SPACE' then ' '
|
255
|
+
when 'COMMA' then ','
|
256
|
+
else "\t"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def start
|
261
|
+
super
|
262
|
+
|
263
|
+
# okey, net/http has reconnect feature. see test_out_hoop_reconnect.rb
|
264
|
+
conn = Net::HTTP.start(@host, @port)
|
265
|
+
begin
|
266
|
+
res = conn.request_get("/?op=status&user.name=#{@username}")
|
267
|
+
if res.code.to_i < 300 and res['Set-Cookie']
|
268
|
+
@authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
|
269
|
+
else
|
270
|
+
$log.error "initalize request failed, code: #{res.code}, message: #{res.body}"
|
271
|
+
raise Fluent::ConfigError, "initalize request failed, code: #{res.code}, message: #{res.body}"
|
272
|
+
end
|
273
|
+
rescue
|
274
|
+
$log.error "failed to connect hoop server: #{@host} port #{@port}"
|
275
|
+
raise
|
276
|
+
end
|
277
|
+
conn.finish
|
278
|
+
$log.info "connected hoop server: #{@host} port #{@port}"
|
279
|
+
end
|
280
|
+
|
281
|
+
def shutdown
|
282
|
+
super
|
283
|
+
end
|
284
|
+
|
285
|
+
def record_to_string(record)
|
286
|
+
record.to_json
|
287
|
+
end
|
288
|
+
|
289
|
+
def format(tag, time, record)
|
290
|
+
time_str = @timef.format(time)
|
291
|
+
time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
|
292
|
+
end
|
293
|
+
|
294
|
+
def path_format(chunk_key)
|
295
|
+
Time.strptime(chunk_key, @time_slice_format).strftime(@path)
|
296
|
+
end
|
297
|
+
|
298
|
+
def send_data(path, data, retries=0)
|
299
|
+
conn = Net::HTTP.start(@host, @port)
|
300
|
+
conn.read_timeout = 5
|
301
|
+
res = conn.request_put(path + "?op=append", data, @authorized_header)
|
302
|
+
if res.code == '401'
|
303
|
+
res = conn.request_get("/?op=status&user.name=#{@username}")
|
304
|
+
if res.code.to_i < 300 and res['Set-Cookie']
|
305
|
+
@authorized_header = {'Cookie' => res['Set-Cookie'].split(';')[0], 'Content-Type' => 'application/octet-stream'}
|
306
|
+
else
|
307
|
+
$log.error "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
|
308
|
+
raise Fluent::ConfigError, "Failed to update authorized cookie, code: #{res.code}, message: #{res.body}"
|
309
|
+
end
|
310
|
+
res = conn.request_put(hdfs_path + "?op=append", data, @authorized_header)
|
311
|
+
end
|
312
|
+
if res.code == '404'
|
313
|
+
res = conn.request_post(path + "?op=create&overwrite=false", data, @authorized_header)
|
314
|
+
end
|
315
|
+
if res.code == '500'
|
316
|
+
if retries >= 3
|
317
|
+
raise StandardError, "failed to send_data with retry 3 times InternalServerError"
|
318
|
+
end
|
319
|
+
sleep 0.3 # yes, this is a magic number
|
320
|
+
res = send_data(path, data, retries + 1)
|
321
|
+
end
|
322
|
+
conn.finish
|
323
|
+
if res.code != '200' and res.code != '201'
|
324
|
+
$log.warn "failed to write data to path: #{path}, code: #{res.code} #{res.message}"
|
325
|
+
end
|
326
|
+
res
|
327
|
+
end
|
328
|
+
|
329
|
+
def write(chunk)
|
330
|
+
hdfs_path = path_format(chunk.key)
|
331
|
+
begin
|
332
|
+
send_data(hdfs_path, chunk.read)
|
333
|
+
rescue
|
334
|
+
$log.error "failed to communicate server, #{@host} port #{@port}, path: #{hdfs_path}"
|
335
|
+
raise
|
336
|
+
end
|
337
|
+
hdfs_path
|
338
|
+
end
|
339
|
+
end
|