fluent-plugin-cassandra-cql 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +12 -8
- data/VERSION +1 -1
- data/fluent-plugin-cassandra-cql.gemspec +4 -3
- data/lib/fluent/plugin/out_cassandra.rb +66 -25
- data/spec/cassandra_output_spec.rb +77 -57
- data/spec/spec_helper.rb +1 -1
- data/spec/support/helpers.rb +56 -0
- metadata +4 -3
data/README.md
CHANGED
@@ -2,12 +2,11 @@
|
|
2
2
|
|
3
3
|
Cassandra output plugin for Fluentd.
|
4
4
|
|
5
|
-
Implemented using the cassandra-cql gem and targets CQL 3.0.0
|
5
|
+
Implemented using the cassandra-cql gem and targets [CQL 3.0.0](http://www.datastax.com/docs/1.1/references/cql/index)
|
6
6
|
and Cassandra 1.1.x
|
7
7
|
|
8
8
|
# Raison d'être
|
9
|
-
Currently, there's another Fluentd Cassandra plugin
|
10
|
-
here](https://github.com/tomitakazutaka/fluent-plugin-cassandra)
|
9
|
+
Currently, there's another [Fluentd Cassandra plugin](https://github.com/tomitakazutaka/fluent-plugin-cassandra)
|
11
10
|
|
12
11
|
It's implemented via the Twitter Cassandra gem, which:
|
13
12
|
|
@@ -30,7 +29,8 @@ via RubyGems
|
|
30
29
|
|
31
30
|
# create table (column family)
|
32
31
|
CREATE TABLE events (id varchar, ts bigint, payload text, PRIMARY KEY (id, ts)) WITH CLUSTERING ORDER BY (ts DESC);
|
33
|
-
|
32
|
+
|
33
|
+
# NOTE: schema definition should match that specified in the Fluentd.conf configuration file
|
34
34
|
|
35
35
|
## Fluentd.conf Configuration
|
36
36
|
<match cassandra.**>
|
@@ -38,17 +38,21 @@ via RubyGems
|
|
38
38
|
host 127.0.0.1 # cassandra hostname.
|
39
39
|
port 9160 # cassandra thrft port.
|
40
40
|
keyspace FluentdLoggers # cassandra keyspace
|
41
|
-
columnfamily
|
41
|
+
columnfamily spec_events # cassandra column family
|
42
42
|
ttl 60 # cassandra ttl *optional => default is 0*
|
43
|
+
schema # cassandra column family schema *hash where keys => column names and values => data types*
|
44
|
+
data_keys # comma delimited string of the fluentd hash's keys
|
45
|
+
pop_data_keys # keep or pop key/values from the fluentd hash when storing it as json
|
43
46
|
</match>
|
44
47
|
|
45
48
|
# Tests
|
46
49
|
|
47
50
|
rake rspec
|
48
51
|
|
49
|
-
NOTE: requires that cassandra be installed on the machine running the
|
52
|
+
NOTE: requires that cassandra be installed on the machine running the
|
53
|
+
test as well as a keyspace named "FluentdLoggers" and a column family
|
54
|
+
named "spec_events"
|
50
55
|
|
51
56
|
# TODOs
|
52
57
|
1) make host and port configurable for tests
|
53
|
-
2)
|
54
|
-
3) add rake task to generate keyspace and columnfamily
|
58
|
+
2) add rake task to generate keyspace and columnfamily
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "fluent-plugin-cassandra-cql"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["obie quelland"]
|
12
|
-
s.date = "2012-11-
|
12
|
+
s.date = "2012-11-11"
|
13
13
|
s.description = "Fluent output plugin for Cassandra via CQL version 3.0.0"
|
14
14
|
s.email = "quelland@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,7 +28,8 @@ Gem::Specification.new do |s|
|
|
28
28
|
"lib/fluent/plugin/out_cassandra.rb",
|
29
29
|
"spec/cassandra_output_spec.rb",
|
30
30
|
"spec/spec.opts",
|
31
|
-
"spec/spec_helper.rb"
|
31
|
+
"spec/spec_helper.rb",
|
32
|
+
"spec/support/helpers.rb"
|
32
33
|
]
|
33
34
|
s.homepage = "http://github.com/obieq/fluent-plugin-cassandra-cql"
|
34
35
|
s.licenses = ["MIT"]
|
@@ -6,33 +6,42 @@ module Fluent
|
|
6
6
|
|
7
7
|
class CassandraOutput < BufferedOutput
|
8
8
|
Fluent::Plugin.register_output('cassandra', self)
|
9
|
-
include SetTimeKeyMixin
|
10
|
-
include SetTagKeyMixin
|
11
9
|
|
12
|
-
config_param :host,
|
13
|
-
config_param :port,
|
14
|
-
config_param :keyspace,
|
15
|
-
config_param :columnfamily,
|
16
|
-
config_param :ttl,
|
10
|
+
config_param :host, :string
|
11
|
+
config_param :port, :integer
|
12
|
+
config_param :keyspace, :string
|
13
|
+
config_param :columnfamily, :string
|
14
|
+
config_param :ttl, :integer, :default => 0
|
15
|
+
config_param :schema, :string
|
16
|
+
config_param :data_keys, :string
|
17
|
+
|
18
|
+
# remove keys from the fluentd json event as they're processed
|
19
|
+
# for individual columns?
|
20
|
+
config_param :pop_data_keys, :bool, :default => true
|
17
21
|
|
18
22
|
def connection
|
19
|
-
@connection ||= get_connection
|
23
|
+
@connection ||= get_connection(self.host, self.port, self.keyspace)
|
20
24
|
end
|
21
25
|
|
22
|
-
#config_set_default :include_time_key, true
|
23
|
-
#config_set_default :include_tag_key, true
|
24
|
-
#config_set_default :time_format, "%Y%m%d%H%M%S"
|
25
|
-
|
26
26
|
def configure(conf)
|
27
27
|
super
|
28
28
|
|
29
|
-
|
30
|
-
raise ConfigError, "'
|
31
|
-
raise ConfigError, "'
|
32
|
-
raise ConfigError, "'
|
33
|
-
|
34
|
-
|
35
|
-
|
29
|
+
# perform validations
|
30
|
+
raise ConfigError, "'Host' is required by Cassandra output (ex: localhost, 127.0.0.1, ec2-54-242-141-252.compute-1.amazonaws.com" if self.host.nil?
|
31
|
+
raise ConfigError, "'Port' is required by Cassandra output (ex: 9160)" if self.port.nil?
|
32
|
+
raise ConfigError, "'Keyspace' is required by Cassandra output (ex: FluentdLoggers)" if self.keyspace.nil?
|
33
|
+
raise ConfigError, "'ColumnFamily' is required by Cassandra output (ex: events)" if self.columnfamily.nil?
|
34
|
+
raise ConfigError, "'Schema' is required by Cassandra output (ex: id,ts,payload)" if self.schema.nil?
|
35
|
+
raise ConfigError, "'Schema' must contain at least two column names (ex: id,ts,payload)" if self.schema.split(',').count < 2
|
36
|
+
raise ConfigError, "'DataKeys' is required by Cassandra output (ex: tag,created_at,data)" if self.data_keys.nil?
|
37
|
+
|
38
|
+
# convert schema from string to hash
|
39
|
+
# NOTE: ok to use eval b/c this isn't this isn't a user
|
40
|
+
# supplied string
|
41
|
+
self.schema = eval(self.schema)
|
42
|
+
|
43
|
+
# convert data keys from string to array
|
44
|
+
self.data_keys = self.data_keys.split(',')
|
36
45
|
end
|
37
46
|
|
38
47
|
def start
|
@@ -50,17 +59,49 @@ module Fluent
|
|
50
59
|
|
51
60
|
def write(chunk)
|
52
61
|
chunk.msgpack_each { |record|
|
53
|
-
|
54
|
-
|
55
|
-
"
|
62
|
+
values = build_insert_values_string(self.schema.keys, self.data_keys, record, self.pop_data_keys)
|
63
|
+
cql = "INSERT INTO #{self.columnfamily} (#{self.schema.keys.join(',')}) " +
|
64
|
+
"VALUES (#{values}) " +
|
65
|
+
"USING TTL #{self.ttl}"
|
66
|
+
@connection.execute(cql)
|
56
67
|
}
|
57
68
|
end
|
58
69
|
|
59
70
|
private
|
60
71
|
|
61
|
-
def get_connection
|
62
|
-
connection_string = "#{
|
63
|
-
::CassandraCQL::Database.new(connection_string, {:keyspace => "\"#{
|
72
|
+
def get_connection(host, port, keyspace)
|
73
|
+
connection_string = "#{host}:#{port}"
|
74
|
+
::CassandraCQL::Database.new(connection_string, {:keyspace => "\"#{keyspace}\"", :cql_version => "3.0.0"})
|
75
|
+
end
|
76
|
+
|
77
|
+
def build_insert_values_string(schema_keys, data_keys, record, pop_data_keys)
|
78
|
+
values = data_keys.map.with_index do |key, index|
|
79
|
+
if pop_data_keys
|
80
|
+
schema[schema_keys[index]] == :string ? "'#{record.delete(key)}'" : record.delete(key)
|
81
|
+
else
|
82
|
+
schema[schema_keys[index]] == :string ? "'#{record[key]}'" : record[key]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# if we have one more schema key than data keys,
|
87
|
+
# we can then infer that we should store the event
|
88
|
+
# as a string representation of the corresponding
|
89
|
+
# json object in the last schema column
|
90
|
+
if schema_keys.count == data_keys.count + 1
|
91
|
+
values << if record.count > 0
|
92
|
+
"'#{record.to_json}'"
|
93
|
+
else
|
94
|
+
# by this point, the extra schema column has been
|
95
|
+
# added to insert cql statement, so we must put
|
96
|
+
# something in it
|
97
|
+
# TODO: detect this scenario earlier and don't
|
98
|
+
# specify the column name/value at all
|
99
|
+
# when constructing the cql stmt
|
100
|
+
"''"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
return values.join(',')
|
64
105
|
end
|
65
106
|
|
66
107
|
end
|
@@ -1,24 +1,34 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
Fluent::Test.setup
|
3
3
|
|
4
|
+
SPEC_COLUMN_FAMILY = "spec_events"
|
5
|
+
DATA_KEYS = "tag,time"
|
6
|
+
|
4
7
|
CONFIG = %[
|
5
8
|
host 127.0.0.1
|
6
9
|
port 9160
|
7
10
|
keyspace FluentdLoggers
|
8
|
-
columnfamily
|
11
|
+
columnfamily #{SPEC_COLUMN_FAMILY}
|
12
|
+
ttl 0
|
13
|
+
schema {:id => :string, :ts => :bigint, :payload => :string}
|
14
|
+
data_keys #{DATA_KEYS}
|
15
|
+
pop_data_keys true
|
9
16
|
]
|
10
17
|
|
11
18
|
describe Fluent::CassandraOutput do
|
19
|
+
include Helpers
|
20
|
+
|
12
21
|
let(:driver) { Fluent::Test::BufferedOutputTestDriver.new(Fluent::CassandraOutput, 'test') }
|
13
22
|
|
14
23
|
after(:each) do
|
15
24
|
d = Fluent::Test::BufferedOutputTestDriver.new(Fluent::CassandraOutput, 'test')
|
16
25
|
d.configure(CONFIG)
|
17
|
-
d.instance.connection.execute("TRUNCATE
|
26
|
+
d.instance.connection.execute("TRUNCATE #{SPEC_COLUMN_FAMILY}")
|
18
27
|
end
|
19
28
|
|
20
|
-
def
|
21
|
-
|
29
|
+
def set_config_value(config, config_name, value)
|
30
|
+
search_text = config.split("\n").map {|text| text if text.strip!.to_s.start_with? config_name.to_s}.compact![0]
|
31
|
+
config.gsub(search_text, "#{config_name} #{value}")
|
22
32
|
end
|
23
33
|
|
24
34
|
context 'configuring' do
|
@@ -29,18 +39,17 @@ describe Fluent::CassandraOutput do
|
|
29
39
|
driver.instance.host.should eq('127.0.0.1')
|
30
40
|
driver.instance.port.should eq(9160)
|
31
41
|
driver.instance.keyspace.should eq('FluentdLoggers')
|
32
|
-
driver.instance.columnfamily.should eq(
|
42
|
+
driver.instance.columnfamily.should eq(SPEC_COLUMN_FAMILY)
|
33
43
|
driver.instance.ttl.should eq(0)
|
34
44
|
end
|
35
45
|
|
36
46
|
it 'should configure ttl' do
|
37
47
|
ttl = 20
|
38
|
-
driver.configure(
|
48
|
+
driver.configure(set_config_value(CONFIG, :ttl, ttl))
|
39
49
|
driver.instance.ttl.should eq(ttl)
|
40
50
|
end
|
41
51
|
|
42
52
|
describe 'exceptions' do
|
43
|
-
|
44
53
|
it 'should raise an exception if host is not configured' do
|
45
54
|
expect { driver.configure(CONFIG.gsub("host", "invalid_config_name")) }.to raise_error Fluent::ConfigError
|
46
55
|
end
|
@@ -56,12 +65,11 @@ describe Fluent::CassandraOutput do
|
|
56
65
|
it 'should raise an exception if columnfamily is not configured' do
|
57
66
|
expect { driver.configure(CONFIG.gsub("columnfamily", "invalid_config_name")) }.to raise_error Fluent::ConfigError
|
58
67
|
end
|
59
|
-
|
60
68
|
end
|
61
69
|
|
62
|
-
end
|
70
|
+
end # context configuring
|
63
71
|
|
64
|
-
context '
|
72
|
+
context 'logging' do
|
65
73
|
|
66
74
|
it 'should start' do
|
67
75
|
driver.configure(CONFIG)
|
@@ -84,55 +92,67 @@ describe Fluent::CassandraOutput do
|
|
84
92
|
driver.run
|
85
93
|
end
|
86
94
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
95
|
+
context 'writing' do
|
96
|
+
context 'as json' do
|
97
|
+
|
98
|
+
describe 'pop no data keys' do
|
99
|
+
it 'should store json in last column' do
|
100
|
+
driver.configure(set_config_value(CONFIG, :pop_data_keys, false))
|
101
|
+
write(driver, SPEC_COLUMN_FAMILY, false)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe 'pop some data keys' do
|
106
|
+
it 'should store json in last last column' do
|
107
|
+
driver.configure(set_config_value(CONFIG, :pop_data_keys, true))
|
108
|
+
write(driver, SPEC_COLUMN_FAMILY, false)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe 'pop all data keys' do
|
113
|
+
it 'should store empty string in last column' do
|
114
|
+
driver.configure(CONFIG)
|
115
|
+
write(driver, SPEC_COLUMN_FAMILY, true)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end # context as json
|
120
|
+
|
121
|
+
context 'as columns' do # no need to test popping of keys b/c it makes no difference
|
122
|
+
|
123
|
+
it 'should write' do
|
124
|
+
config = set_config_value(CONFIG, :data_keys, DATA_KEYS + ',a')
|
125
|
+
config = set_config_value(CONFIG, :pop_data_keys, false)
|
126
|
+
driver.configure(config)
|
127
|
+
write(driver, SPEC_COLUMN_FAMILY, false)
|
128
|
+
end
|
129
|
+
|
130
|
+
end # context as columns
|
131
|
+
|
132
|
+
it 'should not locate event after ttl has expired' do
|
133
|
+
time = Time.now.to_i
|
134
|
+
tag = "ttl_test"
|
135
|
+
ttl = 1 # set ttl to 1 second
|
136
|
+
|
137
|
+
driver.configure(set_config_value(CONFIG, :ttl, ttl))
|
138
|
+
driver.emit({'tag' => tag, 'time' => time, 'a' => 1})
|
139
|
+
driver.run
|
140
|
+
|
141
|
+
# verify record... should return in less than one sec if hitting
|
142
|
+
# cassandra running on localhost
|
143
|
+
events = driver.instance.connection.execute("SELECT * FROM #{SPEC_COLUMN_FAMILY} where ts = #{time}")
|
144
|
+
events.rows.should eq(1)
|
145
|
+
|
146
|
+
# now, sleep long enough for the event to be expired from cassandra
|
147
|
+
sleep(ttl + 1)
|
148
|
+
|
149
|
+
# re-query and verify that no events were returned
|
150
|
+
events = driver.instance.connection.execute("SELECT * FROM #{SPEC_COLUMN_FAMILY} where ts = #{time}")
|
151
|
+
events.rows.should eq(0)
|
111
152
|
end
|
112
|
-
end
|
113
153
|
|
114
|
-
|
115
|
-
time = Time.now.to_i
|
116
|
-
tag = "ttl_test"
|
117
|
-
ttl = 1 # set ttl to 1 second
|
154
|
+
end # context writing
|
118
155
|
|
119
|
-
|
120
|
-
driver.emit({'tag' => tag, 'time' => time, 'a' => 1})
|
121
|
-
driver.run
|
122
|
-
|
123
|
-
# verify record... should return in less than one sec if hitting
|
124
|
-
# cassandra running on localhost
|
125
|
-
events = driver.instance.connection.execute("SELECT * FROM events where ts = #{time}")
|
126
|
-
events.rows.should eq(1)
|
127
|
-
|
128
|
-
# now, sleep long enough for the event to be expired from cassandra
|
129
|
-
sleep(ttl)
|
130
|
-
|
131
|
-
# re-query and verify that no events were returned
|
132
|
-
events = driver.instance.connection.execute("SELECT * FROM events where ts = #{time}")
|
133
|
-
events.rows.should eq(0)
|
134
|
-
end
|
135
|
-
|
136
|
-
end
|
156
|
+
end # context logging
|
137
157
|
|
138
158
|
end # CassandraOutput
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,56 @@
|
|
1
|
+
module Helpers
|
2
|
+
|
3
|
+
def write(driver, column_family_name, tag_and_time_only)
|
4
|
+
tag1 = "test1"
|
5
|
+
tag2 = "test2"
|
6
|
+
time1 = Time.now.to_i
|
7
|
+
time2 = Time.now.to_i + 2
|
8
|
+
|
9
|
+
record1 = {'tag' => tag1, 'time' => time1}
|
10
|
+
record2 = {'tag' => tag2, 'time' => time2}
|
11
|
+
|
12
|
+
unless tag_and_time_only
|
13
|
+
record1.merge!({'a' => 10, 'b' => 'Tesla'})
|
14
|
+
record2.merge!({'a' => 20, 'b' => 'Edison'})
|
15
|
+
end
|
16
|
+
|
17
|
+
# store both records in an array
|
18
|
+
records = [record1, record2]
|
19
|
+
|
20
|
+
driver.emit(records[0])
|
21
|
+
driver.emit(records[1])
|
22
|
+
driver.run # persists to cassandra
|
23
|
+
|
24
|
+
# query cassandra to verify data was correctly persisted
|
25
|
+
row_num = records.count # non-zero based index
|
26
|
+
events = driver.instance.connection.execute("SELECT * FROM #{column_family_name}")
|
27
|
+
events.rows.should eq(records.count)
|
28
|
+
events.fetch do | event | # events should be sorted desc by tag, then time
|
29
|
+
row_num -= 1 # zero-based index
|
30
|
+
|
31
|
+
record = records[row_num]
|
32
|
+
db_hash = event.to_hash
|
33
|
+
|
34
|
+
# need to take in account that we've popped both tag and time
|
35
|
+
# from the payload data when we saved it
|
36
|
+
if driver.instance.pop_data_keys
|
37
|
+
db_hash['id'].should eq(record.delete('tag'))
|
38
|
+
db_hash['ts'].should eq(record.delete('time'))
|
39
|
+
else
|
40
|
+
db_hash['id'].should eq(record['tag'])
|
41
|
+
db_hash['ts'].should eq(record['time'])
|
42
|
+
end
|
43
|
+
|
44
|
+
if driver.instance.schema.keys.count == driver.instance.data_keys.count + 1 # store as json
|
45
|
+
if record.count > 0
|
46
|
+
db_hash['payload'].should eq(record.to_json)
|
47
|
+
else
|
48
|
+
db_hash['payload'].should eq('')
|
49
|
+
end
|
50
|
+
else
|
51
|
+
db_hash['payload'].should eq(record[record.keys[db_hash.keys.index('payload')]])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-cassandra-cql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -143,6 +143,7 @@ files:
|
|
143
143
|
- spec/cassandra_output_spec.rb
|
144
144
|
- spec/spec.opts
|
145
145
|
- spec/spec_helper.rb
|
146
|
+
- spec/support/helpers.rb
|
146
147
|
homepage: http://github.com/obieq/fluent-plugin-cassandra-cql
|
147
148
|
licenses:
|
148
149
|
- MIT
|
@@ -158,7 +159,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
159
|
version: '0'
|
159
160
|
segments:
|
160
161
|
- 0
|
161
|
-
hash: -
|
162
|
+
hash: -4467205590141374709
|
162
163
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
163
164
|
none: false
|
164
165
|
requirements:
|