fluent-plugin-hbase 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .idea
2
+ Gemfile.lock
3
+ *~
4
+ *.iml
5
+ *.gem
data/AUTHORS ADDED
@@ -0,0 +1 @@
1
+ KUOKA Yusuke <kuoka _at_ furyu.jp>
data/ChangeLog ADDED
@@ -0,0 +1,6 @@
1
+
2
+
3
+ Release 0.1.0 - 2012/11/19
4
+
5
+ * First release
6
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/README.rdoc ADDED
@@ -0,0 +1,97 @@
1
+ = HBase output plugin for Fluent event collector
2
+
3
+ == Overview
4
+
5
+ *HBase* output plugin buffers event logs in local file and puts it to HBase periodically.
6
+
7
+ == Installation
8
+
9
+ Simply use RubyGems:
10
+
11
+ gem install fluent-plugin-hbase
12
+
13
+ == Configuration
14
+
15
+ <match pattern>
16
+ type hbase
17
+
18
+ tag_column_name HBASE_COLUMN
19
+ time_column_name HBASE_COLUMN
20
+ fields_to_columns_mapping MAPPING_FROM_JSON_FIELDS_TO_HBASE_COLUMNS
21
+ hbase_host YOUR_HBASE_HOST
22
+ hbase_port YOUR_HBASE_PORT
23
+ hbase_table YOUR_HBASE_TABLE_NAME
24
+ </match>
25
+
26
+ [tag_column_name (required)] The HBase column to save the tag attached to each Fleuntd event log,
27
+ in the format "[Column family name]:[Column name]".
28
+ For example, to save tags to the column "c" in the column family "cf", use "cf:c".
29
+
30
+ [time_column_name (required)] The HBase column to save the time each Fluentd event log was sent,
31
+ in the format "[Column family name]:[Colum name]".
32
+ For example, to save the time to the column "t" in the column family "cf", use "cf:t".
33
+
34
+ [fields_to_columns_mapping (required)] The mapping from JSON fields to HBase columns,
35
+ in the format "[JSON_FIELD1]=>[HBASE_COLUMN1],[JSON_FIELD2]=>[HBASE_COLUMN2],...".
36
+
37
+ Each JSON_FIELD is formatted as field names separated by dot(.)s, e.g. "a.b.c".
38
+ Each HBASE_COLUMN is formatted as a tripled of a column family name, a colon(:), a column name, e.g. "cf:c".
39
+
40
+ [hbase_host (required)] HBase host
41
+
42
+ [hbase_port (required)] HBase port
43
+
44
+ [hbase_table (required)] HBase table name
45
+
46
+ See example/fluent.conf for the configuration should work.
47
+
48
+ You can also test the configuration running a fluentd instance:
49
+
50
+ fluentd -c example/fluent.conf --plugin lib/fluent/plugin
51
+
52
+ == Prerequiresites
53
+
54
+ You must setup your own Hadoop and HBase clusters and open appropriate ports to enable
55
+ the plugin to access HBase via HBase Thrift Server.
56
+
57
+ The plugin is tested solely on the system with:
58
+
59
+ - Hadoop 1.0.4
60
+ - HBase 0.94.0
61
+ - Java 1.6.0_37
62
+ - Mac OS X 10.8 Mountain Lion
63
+
64
+ for now.
65
+
66
+ Please let me know if you find the plugin to work in any other environments.
67
+
68
+ == Running
69
+
70
+ To make the plugin work, you need running instances of:
71
+
72
+ - Hadoop
73
+ - HBase
74
+ - HBase Thrift Server w/ the compact (buffered) protocol (not the framed protocol)
75
+
76
+ The procedure may be:
77
+
78
+ 1. Start Hadoop
79
+
80
+ $ start-all.sh
81
+
82
+ 2. Start HBase
83
+
84
+ $ start-hbase.sh
85
+
86
+ 3. Start HBase Thrift Server with the compact protocol
87
+
88
+ Use the thread pool server as it is the only server supports the compact protocol:
89
+
90
+ $ hbase thrift start -threadpool
91
+
92
+ 4. Run Fluentd
93
+
94
+ == Copyright
95
+
96
+ Copyright:: Copyright (c) 2012 FURYU CORPORATION
97
+ License:: Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.test_files = FileList['test/*.rb']
10
+ test.verbose = true
11
+ end
12
+
13
+ task :default => [:build]
14
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,97 @@
1
+
2
+ ## built-in TCP input
3
+ ## $ echo <json> | fluent-cat <tag>
4
+ <source>
5
+ type forward
6
+ </source>
7
+
8
+ ## built-in UNIX socket input
9
+ #<source>
10
+ # type unix
11
+ #</source>
12
+
13
+ # HTTP input
14
+ # http://localhost:8888/<tag>?json=<json>
15
+ <source>
16
+ type http
17
+ port 8888
18
+ </source>
19
+
20
+ ## File input
21
+ ## read apache logs with tag=apache.access
22
+ #<source>
23
+ # type tail
24
+ # format apache
25
+ # path /var/log/httpd-access.log
26
+ # tag apache.access
27
+ #</source>
28
+
29
+ # Listen DRb for debug
30
+ <source>
31
+ type debug_agent
32
+ port 24230
33
+ </source>
34
+
35
+
36
+ ## match tag=apache.access and write to file
37
+ #<match apache.access>
38
+ # type file
39
+ # path /var/log/fluent/access
40
+ #</match>
41
+
42
+ ## match tag=debug.** and dump to console
43
+ <match debug.**>
44
+ type stdout
45
+ </match>
46
+
47
+ ## match tag=system.** and forward to another fluent server
48
+ #<match system.**>
49
+ # type forward
50
+ # host 192.168.0.11
51
+ # <secondary>
52
+ # host 192.168.0.12
53
+ # </secondary>
54
+ #</match>
55
+
56
+ ## match tag=myapp.** and forward and write to file
57
+ #<match myapp.**>
58
+ # type copy
59
+ # <store>
60
+ # type forward
61
+ # host 192.168.0.13
62
+ # buffer_type file
63
+ # buffer_path /var/log/fluent/myapp-forward
64
+ # retry_limit 50
65
+ # flush_interval 10s
66
+ # </store>
67
+ # <store>
68
+ # type file
69
+ # path /var/log/fluent/myapp
70
+ # </store>
71
+ #</match>
72
+
73
+ ## match fluent's internal events
74
+ #<match fluent.**>
75
+ # type null
76
+ #</match>
77
+
78
+ ## match not matched logs and write to file
79
+ #<match **>
80
+ # type file
81
+ # path /var/log/fluent/else
82
+ # compress gz
83
+ #</match>
84
+
85
+ <match **>
86
+ type hbase
87
+
88
+ # Just to write to HBase rapidly
89
+ flush_interval 1
90
+
91
+ tag_column_name event:tag
92
+ time_column_name event:time
93
+ fields_to_columns_mapping foo=>event:foo,iam.nested=>event:nested
94
+ hbase_host localhost
95
+ hbase_port 9090
96
+ hbase_table events
97
+ </match>
@@ -0,0 +1,3 @@
1
+ #!/bin/sh
2
+
3
+ fluentd -c example/fluent.conf --plugin lib/fluent/plugin
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-hbase"
6
+ gem.description = "HBase output plugin for Fluent event collector"
7
+ gem.homepage = "https://github.com/Furyu/fluent-plugin-hbase"
8
+ gem.summary = gem.description
9
+ gem.version = File.read("VERSION").strip
10
+ gem.authors = ["KUOKA Yusuke"]
11
+ gem.email = "kuoka@furyu.jp"
12
+ gem.has_rdoc = false
13
+ #gem.platform = Gem::Platform::RUBY
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ gem.require_paths = ['lib']
18
+
19
+ gem.add_dependency "fluentd", "~> 0.10.0"
20
+ gem.add_dependency "massive_record", "~> 0.2.2"
21
+ gem.add_development_dependency "rake", ">= 0.9.2"
22
+ end
@@ -0,0 +1,99 @@
1
+ module Fluent
2
+
3
+ class HBaseOutput < Fluent::BufferedOutput
4
+ Fluent::Plugin.register_output('hbase', self)
5
+
6
+ def initialize
7
+ super
8
+ require 'massive_record'
9
+ end
10
+
11
+ config_param :tag_column_name, :string, :default => nil
12
+ config_param :time_column_name, :string, :default => nil
13
+ config_param :fields_to_columns_mapping, :string
14
+ config_param :hbase_host, :string, :default => 'localhost'
15
+ config_param :hbase_port, :integer, :default => 9090
16
+ config_param :hbase_table, :string
17
+
18
+ def configure(conf)
19
+ super
20
+
21
+ @fields_to_columns = @fields_to_columns_mapping.split(",").map { |src_to_dst|
22
+ src_to_dst.split("=>")
23
+ }
24
+ @mapping = Hash[*@fields_to_columns.flatten]
25
+ end
26
+
27
+ def start
28
+ super
29
+
30
+ @conn = MassiveRecord::Wrapper::Connection.new(:host => @hbase_host, :port => @hbase_port)
31
+ @table = MassiveRecord::Wrapper::Table.new(@conn, @hbase_table.intern)
32
+
33
+ unless @table.exists?
34
+ columns = [@tag_column_name, @time_column_name] + @mapping.values
35
+ column_families = columns.map {|column_family_with_column|
36
+ column_family, column = column_family_with_column.split(":")
37
+
38
+ if column.nil?
39
+ raise <<MESSAGE
40
+ Unexpected format for column name: #{column_family_with_column}
41
+ Each destination column in the 'record_to_columns_mapping' option
42
+ must be specified in the format of \"column_family:column\".
43
+ Are you sure you included ':' in column names?
44
+ MESSAGE
45
+ end
46
+
47
+ column_family.intern
48
+ }
49
+
50
+ @table.create_column_families(column_families)
51
+ @table.save
52
+ end
53
+ end
54
+
55
+ def format(tag, time, record)
56
+ row_values = {}
57
+
58
+ row_values[@tag_column_name] = tag unless @tag_column_name.nil?
59
+ row_values[@time_column_name] = time unless @time_column_name.nil?
60
+
61
+ @fields_to_columns.each {|field,column|
62
+
63
+ next if field.nil? or column.nil?
64
+
65
+ components = field.split(".")
66
+ value = record
67
+ for c in components
68
+ value = value[c]
69
+
70
+ break if value.nil?
71
+ end
72
+
73
+ row_values[column] = value
74
+ }
75
+
76
+ row_values.to_msgpack
77
+ end
78
+
79
+ def write(chunk)
80
+ chunk.msgpack_each {|row_values|
81
+ event = {}
82
+
83
+ row_values.each {|column_family_and_column, value|
84
+ column_family, column = column_family_and_column.split(":")
85
+
86
+ (event[column_family.intern] ||= {}).update({column => value})
87
+ }
88
+
89
+ row = MassiveRecord::Wrapper::Row.new
90
+ row.id = SecureRandom.uuid
91
+ row.values = event
92
+ row.table = @table
93
+ row.save
94
+ }
95
+ end
96
+
97
+ end
98
+
99
+ end
data/test/out_hbase.rb ADDED
@@ -0,0 +1,134 @@
1
+ require 'fluent/test'
2
+ require 'fluent/plugin/out_hbase'
3
+
4
+ class HBaseOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ CONFIG = %[
10
+ tag_column_name event:tag
11
+ time_column_name event:time
12
+ fields_to_columns_mapping foo=>event:foo,iam.nested=>event:nested
13
+ hbase_host localhost
14
+ hbase_port 9090
15
+ hbase_table events
16
+ buffer_type memory
17
+ ]
18
+
19
+ def create_driver(conf = CONFIG)
20
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::HBaseOutput) do
21
+ # We don't want to connect the HBase instance while testing
22
+ def start
23
+ super
24
+ end
25
+
26
+ # prevents writes to the HBase instance while testing
27
+ def write(chunk)
28
+ chunk.read
29
+ end
30
+ end.configure(conf)
31
+ end
32
+
33
+ def test_configure
34
+ d = create_driver
35
+ assert_equal 'event:tag', d.instance.tag_column_name
36
+ assert_equal 'event:time', d.instance.time_column_name
37
+ assert_equal 'foo=>event:foo,iam.nested=>event:nested', d.instance.fields_to_columns_mapping
38
+ assert_equal 'localhost', d.instance.hbase_host
39
+ assert_equal 9090, d.instance.hbase_port
40
+ assert_equal 'events', d.instance.hbase_table
41
+ end
42
+
43
+ def test_format
44
+ d = create_driver
45
+
46
+ time_in_int = Time.parse("2011-01-02 13:14:15 UTC").to_i
47
+
48
+ d.emit(
49
+ {
50
+ "foo" => "foo1",
51
+ "iam" => {
52
+ "nested" => "nested1"
53
+ }
54
+ },
55
+ time_in_int
56
+ )
57
+
58
+ d.emit(
59
+ {
60
+ "foo" => "foo2",
61
+ "iam" => {
62
+ "nested" => "nested2"
63
+ }
64
+ },
65
+ time_in_int
66
+ )
67
+
68
+ expected1 = {
69
+ "event:tag" => "test",
70
+ "event:time" => time_in_int,
71
+ "event:foo" => "foo1",
72
+ "event:nested" => "nested1"
73
+ }.to_msgpack
74
+
75
+ expected2 = {
76
+ "event:tag" => "test",
77
+ "event:time" => time_in_int,
78
+ "event:foo" => "foo2",
79
+ "event:nested" => "nested2"
80
+ }.to_msgpack
81
+
82
+ d.expect_format expected1
83
+ d.expect_format expected2
84
+
85
+ d.run
86
+ end
87
+
88
+ def test_write
89
+ d = create_driver
90
+
91
+ time_in_int = Time.parse("2011-01-02 13:14:15 UTC").to_i
92
+
93
+ d.emit(
94
+ {
95
+ "foo" => "foo1",
96
+ "iam" => {
97
+ "nested" => "nested1"
98
+ }
99
+ },
100
+ time_in_int
101
+ )
102
+
103
+ d.emit(
104
+ {
105
+ "foo" => "foo2",
106
+ "iam" => {
107
+ "nested" => "nested2"
108
+ }
109
+ },
110
+ time_in_int
111
+ )
112
+
113
+ expected1 = {
114
+ "event:tag" => "test",
115
+ "event:time" => time_in_int,
116
+ "event:foo" => "foo1",
117
+ "event:nested" => "nested1"
118
+ }.to_msgpack
119
+
120
+ expected2 = {
121
+ "event:tag" => "test",
122
+ "event:time" => time_in_int,
123
+ "event:foo" => "foo2",
124
+ "event:nested" => "nested2"
125
+ }.to_msgpack
126
+
127
+ # HBaseOutputTest#write returns chunk.read
128
+ data = d.run
129
+
130
+ assert_equal expected1 + expected2, data
131
+ end
132
+
133
+ end
134
+
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-hbase
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - KUOKA Yusuke
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.10.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.10.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: massive_record
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.2.2
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.2.2
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.9.2
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.9.2
62
+ description: HBase output plugin for Fluent event collector
63
+ email: kuoka@furyu.jp
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files: []
67
+ files:
68
+ - .gitignore
69
+ - AUTHORS
70
+ - ChangeLog
71
+ - Gemfile
72
+ - README.rdoc
73
+ - Rakefile
74
+ - VERSION
75
+ - example/fluent.conf
76
+ - example/run-fluentd.sh
77
+ - fluent-plugin-hbase.gemspec
78
+ - lib/fluent/plugin/out_hbase.rb
79
+ - test/out_hbase.rb
80
+ homepage: https://github.com/Furyu/fluent-plugin-hbase
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ segments:
93
+ - 0
94
+ hash: 1566921651398845132
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ segments:
102
+ - 0
103
+ hash: 1566921651398845132
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.24
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: HBase output plugin for Fluent event collector
110
+ test_files:
111
+ - test/out_hbase.rb