fluent-plugin-hbase 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .idea
2
+ Gemfile.lock
3
+ *~
4
+ *.iml
5
+ *.gem
data/AUTHORS ADDED
@@ -0,0 +1 @@
1
+ KUOKA Yusuke <kuoka _at_ furyu.jp>
data/ChangeLog ADDED
@@ -0,0 +1,6 @@
1
+
2
+
3
+ Release 0.1.0 - 2012/11/19
4
+
5
+ * First release
6
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/README.rdoc ADDED
@@ -0,0 +1,97 @@
1
+ = HBase output plugin for Fluent event collector
2
+
3
+ == Overview
4
+
5
+ *HBase* output plugin buffers event logs in local file and puts it to HBase periodically.
6
+
7
+ == Installation
8
+
9
+ Simply use RubyGems:
10
+
11
+ gem install fluent-plugin-hbase
12
+
13
+ == Configuration
14
+
15
+ <match pattern>
16
+ type hbase
17
+
18
+ tag_column_name HBASE_COLUMN
19
+ time_column_name HBASE_COLUMN
20
+ fields_to_columns_mapping MAPPING_FROM_JSON_FIELDS_TO_HBASE_COLUMNS
21
+ hbase_host YOUR_HBASE_HOST
22
+ hbase_port YOUR_HBASE_PORT
23
+ hbase_table YOUR_HBASE_TABLE_NAME
24
+ </match>
25
+
26
+ [tag_column_name (required)] The HBase column to save the tag attached to each Fleuntd event log,
27
+ in the format "[Column family name]:[Column name]".
28
+ For example, to save tags to the column "c" in the column family "cf", use "cf:c".
29
+
30
+ [time_column_name (required)] The HBase column to save the time each Fluentd event log was sent,
31
+ in the format "[Column family name]:[Colum name]".
32
+ For example, to save the time to the column "t" in the column family "cf", use "cf:t".
33
+
34
+ [fields_to_columns_mapping (required)] The mapping from JSON fields to HBase columns,
35
+ in the format "[JSON_FIELD1]=>[HBASE_COLUMN1],[JSON_FIELD2]=>[HBASE_COLUMN2],...".
36
+
37
+ Each JSON_FIELD is formatted as field names separated by dot(.)s, e.g. "a.b.c".
38
+ Each HBASE_COLUMN is formatted as a tripled of a column family name, a colon(:), a column name, e.g. "cf:c".
39
+
40
+ [hbase_host (required)] HBase host
41
+
42
+ [hbase_port (required)] HBase port
43
+
44
+ [hbase_table (required)] HBase table name
45
+
46
+ See example/fluent.conf for the configuration should work.
47
+
48
+ You can also test the configuration running a fluentd instance:
49
+
50
+ fluentd -c example/fluent.conf --plugin lib/fluent/plugin
51
+
52
+ == Prerequiresites
53
+
54
+ You must setup your own Hadoop and HBase clusters and open appropriate ports to enable
55
+ the plugin to access HBase via HBase Thrift Server.
56
+
57
+ The plugin is tested solely on the system with:
58
+
59
+ - Hadoop 1.0.4
60
+ - HBase 0.94.0
61
+ - Java 1.6.0_37
62
+ - Mac OS X 10.8 Mountain Lion
63
+
64
+ for now.
65
+
66
+ Please let me know if you find the plugin to work in any other environments.
67
+
68
+ == Running
69
+
70
+ To make the plugin work, you need running instances of:
71
+
72
+ - Hadoop
73
+ - HBase
74
+ - HBase Thrift Server w/ the compact (buffered) protocol (not the framed protocol)
75
+
76
+ The procedure may be:
77
+
78
+ 1. Start Hadoop
79
+
80
+ $ start-all.sh
81
+
82
+ 2. Start HBase
83
+
84
+ $ start-hbase.sh
85
+
86
+ 3. Start HBase Thrift Server with the compact protocol
87
+
88
+ Use the thread pool server as it is the only server supports the compact protocol:
89
+
90
+ $ hbase thrift start -threadpool
91
+
92
+ 4. Run Fluentd
93
+
94
+ == Copyright
95
+
96
+ Copyright:: Copyright (c) 2012 FURYU CORPORATION
97
+ License:: Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.test_files = FileList['test/*.rb']
10
+ test.verbose = true
11
+ end
12
+
13
+ task :default => [:build]
14
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,97 @@
1
+
2
+ ## built-in TCP input
3
+ ## $ echo <json> | fluent-cat <tag>
4
+ <source>
5
+ type forward
6
+ </source>
7
+
8
+ ## built-in UNIX socket input
9
+ #<source>
10
+ # type unix
11
+ #</source>
12
+
13
+ # HTTP input
14
+ # http://localhost:8888/<tag>?json=<json>
15
+ <source>
16
+ type http
17
+ port 8888
18
+ </source>
19
+
20
+ ## File input
21
+ ## read apache logs with tag=apache.access
22
+ #<source>
23
+ # type tail
24
+ # format apache
25
+ # path /var/log/httpd-access.log
26
+ # tag apache.access
27
+ #</source>
28
+
29
+ # Listen DRb for debug
30
+ <source>
31
+ type debug_agent
32
+ port 24230
33
+ </source>
34
+
35
+
36
+ ## match tag=apache.access and write to file
37
+ #<match apache.access>
38
+ # type file
39
+ # path /var/log/fluent/access
40
+ #</match>
41
+
42
+ ## match tag=debug.** and dump to console
43
+ <match debug.**>
44
+ type stdout
45
+ </match>
46
+
47
+ ## match tag=system.** and forward to another fluent server
48
+ #<match system.**>
49
+ # type forward
50
+ # host 192.168.0.11
51
+ # <secondary>
52
+ # host 192.168.0.12
53
+ # </secondary>
54
+ #</match>
55
+
56
+ ## match tag=myapp.** and forward and write to file
57
+ #<match myapp.**>
58
+ # type copy
59
+ # <store>
60
+ # type forward
61
+ # host 192.168.0.13
62
+ # buffer_type file
63
+ # buffer_path /var/log/fluent/myapp-forward
64
+ # retry_limit 50
65
+ # flush_interval 10s
66
+ # </store>
67
+ # <store>
68
+ # type file
69
+ # path /var/log/fluent/myapp
70
+ # </store>
71
+ #</match>
72
+
73
+ ## match fluent's internal events
74
+ #<match fluent.**>
75
+ # type null
76
+ #</match>
77
+
78
+ ## match not matched logs and write to file
79
+ #<match **>
80
+ # type file
81
+ # path /var/log/fluent/else
82
+ # compress gz
83
+ #</match>
84
+
85
+ <match **>
86
+ type hbase
87
+
88
+ # Just to write to HBase rapidly
89
+ flush_interval 1
90
+
91
+ tag_column_name event:tag
92
+ time_column_name event:time
93
+ fields_to_columns_mapping foo=>event:foo,iam.nested=>event:nested
94
+ hbase_host localhost
95
+ hbase_port 9090
96
+ hbase_table events
97
+ </match>
@@ -0,0 +1,3 @@
1
+ #!/bin/sh
2
+
3
+ fluentd -c example/fluent.conf --plugin lib/fluent/plugin
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-hbase"
6
+ gem.description = "HBase output plugin for Fluent event collector"
7
+ gem.homepage = "https://github.com/Furyu/fluent-plugin-hbase"
8
+ gem.summary = gem.description
9
+ gem.version = File.read("VERSION").strip
10
+ gem.authors = ["KUOKA Yusuke"]
11
+ gem.email = "kuoka@furyu.jp"
12
+ gem.has_rdoc = false
13
+ #gem.platform = Gem::Platform::RUBY
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ gem.require_paths = ['lib']
18
+
19
+ gem.add_dependency "fluentd", "~> 0.10.0"
20
+ gem.add_dependency "massive_record", "~> 0.2.2"
21
+ gem.add_development_dependency "rake", ">= 0.9.2"
22
+ end
@@ -0,0 +1,99 @@
1
+ module Fluent
2
+
3
+ class HBaseOutput < Fluent::BufferedOutput
4
+ Fluent::Plugin.register_output('hbase', self)
5
+
6
+ def initialize
7
+ super
8
+ require 'massive_record'
9
+ end
10
+
11
+ config_param :tag_column_name, :string, :default => nil
12
+ config_param :time_column_name, :string, :default => nil
13
+ config_param :fields_to_columns_mapping, :string
14
+ config_param :hbase_host, :string, :default => 'localhost'
15
+ config_param :hbase_port, :integer, :default => 9090
16
+ config_param :hbase_table, :string
17
+
18
+ def configure(conf)
19
+ super
20
+
21
+ @fields_to_columns = @fields_to_columns_mapping.split(",").map { |src_to_dst|
22
+ src_to_dst.split("=>")
23
+ }
24
+ @mapping = Hash[*@fields_to_columns.flatten]
25
+ end
26
+
27
+ def start
28
+ super
29
+
30
+ @conn = MassiveRecord::Wrapper::Connection.new(:host => @hbase_host, :port => @hbase_port)
31
+ @table = MassiveRecord::Wrapper::Table.new(@conn, @hbase_table.intern)
32
+
33
+ unless @table.exists?
34
+ columns = [@tag_column_name, @time_column_name] + @mapping.values
35
+ column_families = columns.map {|column_family_with_column|
36
+ column_family, column = column_family_with_column.split(":")
37
+
38
+ if column.nil?
39
+ raise <<MESSAGE
40
+ Unexpected format for column name: #{column_family_with_column}
41
+ Each destination column in the 'record_to_columns_mapping' option
42
+ must be specified in the format of \"column_family:column\".
43
+ Are you sure you included ':' in column names?
44
+ MESSAGE
45
+ end
46
+
47
+ column_family.intern
48
+ }
49
+
50
+ @table.create_column_families(column_families)
51
+ @table.save
52
+ end
53
+ end
54
+
55
+ def format(tag, time, record)
56
+ row_values = {}
57
+
58
+ row_values[@tag_column_name] = tag unless @tag_column_name.nil?
59
+ row_values[@time_column_name] = time unless @time_column_name.nil?
60
+
61
+ @fields_to_columns.each {|field,column|
62
+
63
+ next if field.nil? or column.nil?
64
+
65
+ components = field.split(".")
66
+ value = record
67
+ for c in components
68
+ value = value[c]
69
+
70
+ break if value.nil?
71
+ end
72
+
73
+ row_values[column] = value
74
+ }
75
+
76
+ row_values.to_msgpack
77
+ end
78
+
79
+ def write(chunk)
80
+ chunk.msgpack_each {|row_values|
81
+ event = {}
82
+
83
+ row_values.each {|column_family_and_column, value|
84
+ column_family, column = column_family_and_column.split(":")
85
+
86
+ (event[column_family.intern] ||= {}).update({column => value})
87
+ }
88
+
89
+ row = MassiveRecord::Wrapper::Row.new
90
+ row.id = SecureRandom.uuid
91
+ row.values = event
92
+ row.table = @table
93
+ row.save
94
+ }
95
+ end
96
+
97
+ end
98
+
99
+ end
data/test/out_hbase.rb ADDED
@@ -0,0 +1,134 @@
1
+ require 'fluent/test'
2
+ require 'fluent/plugin/out_hbase'
3
+
4
+ class HBaseOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ CONFIG = %[
10
+ tag_column_name event:tag
11
+ time_column_name event:time
12
+ fields_to_columns_mapping foo=>event:foo,iam.nested=>event:nested
13
+ hbase_host localhost
14
+ hbase_port 9090
15
+ hbase_table events
16
+ buffer_type memory
17
+ ]
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::HBaseOutput) do
21
+ # We don't want to connect the HBase instance while testing
22
+ def start
23
+ super
24
+ end
25
+
26
+ # prevents writes to the HBase instance while testing
27
+ def write(chunk)
28
+ chunk.read
29
+ end
30
+ end.configure(conf)
31
+ end
32
+
33
+ def test_configure
34
+ d = create_driver
35
+ assert_equal 'event:tag', d.instance.tag_column_name
36
+ assert_equal 'event:time', d.instance.time_column_name
37
+ assert_equal 'foo=>event:foo,iam.nested=>event:nested', d.instance.fields_to_columns_mapping
38
+ assert_equal 'localhost', d.instance.hbase_host
39
+ assert_equal 9090, d.instance.hbase_port
40
+ assert_equal 'events', d.instance.hbase_table
41
+ end
42
+
43
+ def test_format
44
+ d = create_driver
45
+
46
+ time_in_int = Time.parse("2011-01-02 13:14:15 UTC").to_i
47
+
48
+ d.emit(
49
+ {
50
+ "foo" => "foo1",
51
+ "iam" => {
52
+ "nested" => "nested1"
53
+ }
54
+ },
55
+ time_in_int
56
+ )
57
+
58
+ d.emit(
59
+ {
60
+ "foo" => "foo2",
61
+ "iam" => {
62
+ "nested" => "nested2"
63
+ }
64
+ },
65
+ time_in_int
66
+ )
67
+
68
+ expected1 = {
69
+ "event:tag" => "test",
70
+ "event:time" => time_in_int,
71
+ "event:foo" => "foo1",
72
+ "event:nested" => "nested1"
73
+ }.to_msgpack
74
+
75
+ expected2 = {
76
+ "event:tag" => "test",
77
+ "event:time" => time_in_int,
78
+ "event:foo" => "foo2",
79
+ "event:nested" => "nested2"
80
+ }.to_msgpack
81
+
82
+ d.expect_format expected1
83
+ d.expect_format expected2
84
+
85
+ d.run
86
+ end
87
+
88
+ def test_write
89
+ d = create_driver
90
+
91
+ time_in_int = Time.parse("2011-01-02 13:14:15 UTC").to_i
92
+
93
+ d.emit(
94
+ {
95
+ "foo" => "foo1",
96
+ "iam" => {
97
+ "nested" => "nested1"
98
+ }
99
+ },
100
+ time_in_int
101
+ )
102
+
103
+ d.emit(
104
+ {
105
+ "foo" => "foo2",
106
+ "iam" => {
107
+ "nested" => "nested2"
108
+ }
109
+ },
110
+ time_in_int
111
+ )
112
+
113
+ expected1 = {
114
+ "event:tag" => "test",
115
+ "event:time" => time_in_int,
116
+ "event:foo" => "foo1",
117
+ "event:nested" => "nested1"
118
+ }.to_msgpack
119
+
120
+ expected2 = {
121
+ "event:tag" => "test",
122
+ "event:time" => time_in_int,
123
+ "event:foo" => "foo2",
124
+ "event:nested" => "nested2"
125
+ }.to_msgpack
126
+
127
+ # HBaseOutputTest#write returns chunk.read
128
+ data = d.run
129
+
130
+ assert_equal expected1 + expected2, data
131
+ end
132
+
133
+ end
134
+
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-hbase
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - KUOKA Yusuke
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.10.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.10.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: massive_record
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.2.2
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.2.2
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.9.2
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.9.2
62
+ description: HBase output plugin for Fluent event collector
63
+ email: kuoka@furyu.jp
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files: []
67
+ files:
68
+ - .gitignore
69
+ - AUTHORS
70
+ - ChangeLog
71
+ - Gemfile
72
+ - README.rdoc
73
+ - Rakefile
74
+ - VERSION
75
+ - example/fluent.conf
76
+ - example/run-fluentd.sh
77
+ - fluent-plugin-hbase.gemspec
78
+ - lib/fluent/plugin/out_hbase.rb
79
+ - test/out_hbase.rb
80
+ homepage: https://github.com/Furyu/fluent-plugin-hbase
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ segments:
93
+ - 0
94
+ hash: 1566921651398845132
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ segments:
102
+ - 0
103
+ hash: 1566921651398845132
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.24
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: HBase output plugin for Fluent event collector
110
+ test_files:
111
+ - test/out_hbase.rb