fluent-plugin-cassandra-cql 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +12 -8
- data/VERSION +1 -1
- data/fluent-plugin-cassandra-cql.gemspec +4 -3
- data/lib/fluent/plugin/out_cassandra.rb +66 -25
- data/spec/cassandra_output_spec.rb +77 -57
- data/spec/spec_helper.rb +1 -1
- data/spec/support/helpers.rb +56 -0
- metadata +4 -3
data/README.md
CHANGED
@@ -2,12 +2,11 @@
|
|
2
2
|
|
3
3
|
Cassandra output plugin for Fluentd.
|
4
4
|
|
5
|
-
Implemented using the cassandra-cql gem and targets CQL 3.0.0
|
5
|
+
Implemented using the cassandra-cql gem and targets [CQL 3.0.0](http://www.datastax.com/docs/1.1/references/cql/index)
|
6
6
|
and Cassandra 1.1.x
|
7
7
|
|
8
8
|
# Raison d'être
|
9
|
-
Currently, there's another Fluentd Cassandra plugin
|
10
|
-
here](https://github.com/tomitakazutaka/fluent-plugin-cassandra)
|
9
|
+
Currently, there's another [Fluentd Cassandra plugin](https://github.com/tomitakazutaka/fluent-plugin-cassandra)
|
11
10
|
|
12
11
|
It's implemented via the Twitter Cassandra gem, which:
|
13
12
|
|
@@ -30,7 +29,8 @@ via RubyGems
|
|
30
29
|
|
31
30
|
# create table (column family)
|
32
31
|
CREATE TABLE events (id varchar, ts bigint, payload text, PRIMARY KEY (id, ts)) WITH CLUSTERING ORDER BY (ts DESC);
|
33
|
-
|
32
|
+
|
33
|
+
# NOTE: schema definition should match that specified in the Fluentd.conf configuration file
|
34
34
|
|
35
35
|
## Fluentd.conf Configuration
|
36
36
|
<match cassandra.**>
|
@@ -38,17 +38,21 @@ via RubyGems
|
|
38
38
|
host 127.0.0.1 # cassandra hostname.
|
39
39
|
port 9160 # cassandra thrft port.
|
40
40
|
keyspace FluentdLoggers # cassandra keyspace
|
41
|
-
columnfamily
|
41
|
+
columnfamily spec_events # cassandra column family
|
42
42
|
ttl 60 # cassandra ttl *optional => default is 0*
|
43
|
+
schema # cassandra column family schema *hash where keys => column names and values => data types*
|
44
|
+
data_keys # comma delimited string of the fluentd hash's keys
|
45
|
+
pop_data_keys # keep or pop key/values from the fluentd hash when storing it as json
|
43
46
|
</match>
|
44
47
|
|
45
48
|
# Tests
|
46
49
|
|
47
50
|
rake rspec
|
48
51
|
|
49
|
-
NOTE: requires that cassandra be installed on the machine running the
|
52
|
+
NOTE: requires that cassandra be installed on the machine running the
|
53
|
+
test as well as a keyspace named "FluentdLoggers" and a column family
|
54
|
+
named "spec_events"
|
50
55
|
|
51
56
|
# TODOs
|
52
57
|
1) make host and port configurable for tests
|
53
|
-
2)
|
54
|
-
3) add rake task to generate keyspace and columnfamily
|
58
|
+
2) add rake task to generate keyspace and columnfamily
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "fluent-plugin-cassandra-cql"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["obie quelland"]
|
12
|
-
s.date = "2012-11-
|
12
|
+
s.date = "2012-11-11"
|
13
13
|
s.description = "Fluent output plugin for Cassandra via CQL version 3.0.0"
|
14
14
|
s.email = "quelland@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,7 +28,8 @@ Gem::Specification.new do |s|
|
|
28
28
|
"lib/fluent/plugin/out_cassandra.rb",
|
29
29
|
"spec/cassandra_output_spec.rb",
|
30
30
|
"spec/spec.opts",
|
31
|
-
"spec/spec_helper.rb"
|
31
|
+
"spec/spec_helper.rb",
|
32
|
+
"spec/support/helpers.rb"
|
32
33
|
]
|
33
34
|
s.homepage = "http://github.com/obieq/fluent-plugin-cassandra-cql"
|
34
35
|
s.licenses = ["MIT"]
|
@@ -6,33 +6,42 @@ module Fluent
|
|
6
6
|
|
7
7
|
class CassandraOutput < BufferedOutput
|
8
8
|
Fluent::Plugin.register_output('cassandra', self)
|
9
|
-
include SetTimeKeyMixin
|
10
|
-
include SetTagKeyMixin
|
11
9
|
|
12
|
-
config_param :host,
|
13
|
-
config_param :port,
|
14
|
-
config_param :keyspace,
|
15
|
-
config_param :columnfamily,
|
16
|
-
config_param :ttl,
|
10
|
+
config_param :host, :string
|
11
|
+
config_param :port, :integer
|
12
|
+
config_param :keyspace, :string
|
13
|
+
config_param :columnfamily, :string
|
14
|
+
config_param :ttl, :integer, :default => 0
|
15
|
+
config_param :schema, :string
|
16
|
+
config_param :data_keys, :string
|
17
|
+
|
18
|
+
# remove keys from the fluentd json event as they're processed
|
19
|
+
# for individual columns?
|
20
|
+
config_param :pop_data_keys, :bool, :default => true
|
17
21
|
|
18
22
|
def connection
|
19
|
-
@connection ||= get_connection
|
23
|
+
@connection ||= get_connection(self.host, self.port, self.keyspace)
|
20
24
|
end
|
21
25
|
|
22
|
-
#config_set_default :include_time_key, true
|
23
|
-
#config_set_default :include_tag_key, true
|
24
|
-
#config_set_default :time_format, "%Y%m%d%H%M%S"
|
25
|
-
|
26
26
|
def configure(conf)
|
27
27
|
super
|
28
28
|
|
29
|
-
|
30
|
-
raise ConfigError, "'
|
31
|
-
raise ConfigError, "'
|
32
|
-
raise ConfigError, "'
|
33
|
-
|
34
|
-
|
35
|
-
|
29
|
+
# perform validations
|
30
|
+
raise ConfigError, "'Host' is required by Cassandra output (ex: localhost, 127.0.0.1, ec2-54-242-141-252.compute-1.amazonaws.com" if self.host.nil?
|
31
|
+
raise ConfigError, "'Port' is required by Cassandra output (ex: 9160)" if self.port.nil?
|
32
|
+
raise ConfigError, "'Keyspace' is required by Cassandra output (ex: FluentdLoggers)" if self.keyspace.nil?
|
33
|
+
raise ConfigError, "'ColumnFamily' is required by Cassandra output (ex: events)" if self.columnfamily.nil?
|
34
|
+
raise ConfigError, "'Schema' is required by Cassandra output (ex: id,ts,payload)" if self.schema.nil?
|
35
|
+
raise ConfigError, "'Schema' must contain at least two column names (ex: id,ts,payload)" if self.schema.split(',').count < 2
|
36
|
+
raise ConfigError, "'DataKeys' is required by Cassandra output (ex: tag,created_at,data)" if self.data_keys.nil?
|
37
|
+
|
38
|
+
# convert schema from string to hash
|
39
|
+
# NOTE: ok to use eval b/c this isn't this isn't a user
|
40
|
+
# supplied string
|
41
|
+
self.schema = eval(self.schema)
|
42
|
+
|
43
|
+
# convert data keys from string to array
|
44
|
+
self.data_keys = self.data_keys.split(',')
|
36
45
|
end
|
37
46
|
|
38
47
|
def start
|
@@ -50,17 +59,49 @@ module Fluent
|
|
50
59
|
|
51
60
|
def write(chunk)
|
52
61
|
chunk.msgpack_each { |record|
|
53
|
-
|
54
|
-
|
55
|
-
"
|
62
|
+
values = build_insert_values_string(self.schema.keys, self.data_keys, record, self.pop_data_keys)
|
63
|
+
cql = "INSERT INTO #{self.columnfamily} (#{self.schema.keys.join(',')}) " +
|
64
|
+
"VALUES (#{values}) " +
|
65
|
+
"USING TTL #{self.ttl}"
|
66
|
+
@connection.execute(cql)
|
56
67
|
}
|
57
68
|
end
|
58
69
|
|
59
70
|
private
|
60
71
|
|
61
|
-
def get_connection
|
62
|
-
connection_string = "#{
|
63
|
-
::CassandraCQL::Database.new(connection_string, {:keyspace => "\"#{
|
72
|
+
def get_connection(host, port, keyspace)
|
73
|
+
connection_string = "#{host}:#{port}"
|
74
|
+
::CassandraCQL::Database.new(connection_string, {:keyspace => "\"#{keyspace}\"", :cql_version => "3.0.0"})
|
75
|
+
end
|
76
|
+
|
77
|
+
def build_insert_values_string(schema_keys, data_keys, record, pop_data_keys)
|
78
|
+
values = data_keys.map.with_index do |key, index|
|
79
|
+
if pop_data_keys
|
80
|
+
schema[schema_keys[index]] == :string ? "'#{record.delete(key)}'" : record.delete(key)
|
81
|
+
else
|
82
|
+
schema[schema_keys[index]] == :string ? "'#{record[key]}'" : record[key]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# if we have one more schema key than data keys,
|
87
|
+
# we can then infer that we should store the event
|
88
|
+
# as a string representation of the corresponding
|
89
|
+
# json object in the last schema column
|
90
|
+
if schema_keys.count == data_keys.count + 1
|
91
|
+
values << if record.count > 0
|
92
|
+
"'#{record.to_json}'"
|
93
|
+
else
|
94
|
+
# by this point, the extra schema column has been
|
95
|
+
# added to insert cql statement, so we must put
|
96
|
+
# something in it
|
97
|
+
# TODO: detect this scenario earlier and don't
|
98
|
+
# specify the column name/value at all
|
99
|
+
# when constructing the cql stmt
|
100
|
+
"''"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
return values.join(',')
|
64
105
|
end
|
65
106
|
|
66
107
|
end
|
@@ -1,24 +1,34 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
Fluent::Test.setup
|
3
3
|
|
4
|
+
SPEC_COLUMN_FAMILY = "spec_events"
|
5
|
+
DATA_KEYS = "tag,time"
|
6
|
+
|
4
7
|
CONFIG = %[
|
5
8
|
host 127.0.0.1
|
6
9
|
port 9160
|
7
10
|
keyspace FluentdLoggers
|
8
|
-
columnfamily
|
11
|
+
columnfamily #{SPEC_COLUMN_FAMILY}
|
12
|
+
ttl 0
|
13
|
+
schema {:id => :string, :ts => :bigint, :payload => :string}
|
14
|
+
data_keys #{DATA_KEYS}
|
15
|
+
pop_data_keys true
|
9
16
|
]
|
10
17
|
|
11
18
|
describe Fluent::CassandraOutput do
|
19
|
+
include Helpers
|
20
|
+
|
12
21
|
let(:driver) { Fluent::Test::BufferedOutputTestDriver.new(Fluent::CassandraOutput, 'test') }
|
13
22
|
|
14
23
|
after(:each) do
|
15
24
|
d = Fluent::Test::BufferedOutputTestDriver.new(Fluent::CassandraOutput, 'test')
|
16
25
|
d.configure(CONFIG)
|
17
|
-
d.instance.connection.execute("TRUNCATE
|
26
|
+
d.instance.connection.execute("TRUNCATE #{SPEC_COLUMN_FAMILY}")
|
18
27
|
end
|
19
28
|
|
20
|
-
def
|
21
|
-
|
29
|
+
def set_config_value(config, config_name, value)
|
30
|
+
search_text = config.split("\n").map {|text| text if text.strip!.to_s.start_with? config_name.to_s}.compact![0]
|
31
|
+
config.gsub(search_text, "#{config_name} #{value}")
|
22
32
|
end
|
23
33
|
|
24
34
|
context 'configuring' do
|
@@ -29,18 +39,17 @@ describe Fluent::CassandraOutput do
|
|
29
39
|
driver.instance.host.should eq('127.0.0.1')
|
30
40
|
driver.instance.port.should eq(9160)
|
31
41
|
driver.instance.keyspace.should eq('FluentdLoggers')
|
32
|
-
driver.instance.columnfamily.should eq(
|
42
|
+
driver.instance.columnfamily.should eq(SPEC_COLUMN_FAMILY)
|
33
43
|
driver.instance.ttl.should eq(0)
|
34
44
|
end
|
35
45
|
|
36
46
|
it 'should configure ttl' do
|
37
47
|
ttl = 20
|
38
|
-
driver.configure(
|
48
|
+
driver.configure(set_config_value(CONFIG, :ttl, ttl))
|
39
49
|
driver.instance.ttl.should eq(ttl)
|
40
50
|
end
|
41
51
|
|
42
52
|
describe 'exceptions' do
|
43
|
-
|
44
53
|
it 'should raise an exception if host is not configured' do
|
45
54
|
expect { driver.configure(CONFIG.gsub("host", "invalid_config_name")) }.to raise_error Fluent::ConfigError
|
46
55
|
end
|
@@ -56,12 +65,11 @@ describe Fluent::CassandraOutput do
|
|
56
65
|
it 'should raise an exception if columnfamily is not configured' do
|
57
66
|
expect { driver.configure(CONFIG.gsub("columnfamily", "invalid_config_name")) }.to raise_error Fluent::ConfigError
|
58
67
|
end
|
59
|
-
|
60
68
|
end
|
61
69
|
|
62
|
-
end
|
70
|
+
end # context configuring
|
63
71
|
|
64
|
-
context '
|
72
|
+
context 'logging' do
|
65
73
|
|
66
74
|
it 'should start' do
|
67
75
|
driver.configure(CONFIG)
|
@@ -84,55 +92,67 @@ describe Fluent::CassandraOutput do
|
|
84
92
|
driver.run
|
85
93
|
end
|
86
94
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
95
|
+
context 'writing' do
|
96
|
+
context 'as json' do
|
97
|
+
|
98
|
+
describe 'pop no data keys' do
|
99
|
+
it 'should store json in last column' do
|
100
|
+
driver.configure(set_config_value(CONFIG, :pop_data_keys, false))
|
101
|
+
write(driver, SPEC_COLUMN_FAMILY, false)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe 'pop some data keys' do
|
106
|
+
it 'should store json in last last column' do
|
107
|
+
driver.configure(set_config_value(CONFIG, :pop_data_keys, true))
|
108
|
+
write(driver, SPEC_COLUMN_FAMILY, false)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe 'pop all data keys' do
|
113
|
+
it 'should store empty string in last column' do
|
114
|
+
driver.configure(CONFIG)
|
115
|
+
write(driver, SPEC_COLUMN_FAMILY, true)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end # context as json
|
120
|
+
|
121
|
+
context 'as columns' do # no need to test popping of keys b/c it makes no difference
|
122
|
+
|
123
|
+
it 'should write' do
|
124
|
+
config = set_config_value(CONFIG, :data_keys, DATA_KEYS + ',a')
|
125
|
+
config = set_config_value(CONFIG, :pop_data_keys, false)
|
126
|
+
driver.configure(config)
|
127
|
+
write(driver, SPEC_COLUMN_FAMILY, false)
|
128
|
+
end
|
129
|
+
|
130
|
+
end # context as columns
|
131
|
+
|
132
|
+
it 'should not locate event after ttl has expired' do
|
133
|
+
time = Time.now.to_i
|
134
|
+
tag = "ttl_test"
|
135
|
+
ttl = 1 # set ttl to 1 second
|
136
|
+
|
137
|
+
driver.configure(set_config_value(CONFIG, :ttl, ttl))
|
138
|
+
driver.emit({'tag' => tag, 'time' => time, 'a' => 1})
|
139
|
+
driver.run
|
140
|
+
|
141
|
+
# verify record... should return in less than one sec if hitting
|
142
|
+
# cassandra running on localhost
|
143
|
+
events = driver.instance.connection.execute("SELECT * FROM #{SPEC_COLUMN_FAMILY} where ts = #{time}")
|
144
|
+
events.rows.should eq(1)
|
145
|
+
|
146
|
+
# now, sleep long enough for the event to be expired from cassandra
|
147
|
+
sleep(ttl + 1)
|
148
|
+
|
149
|
+
# re-query and verify that no events were returned
|
150
|
+
events = driver.instance.connection.execute("SELECT * FROM #{SPEC_COLUMN_FAMILY} where ts = #{time}")
|
151
|
+
events.rows.should eq(0)
|
111
152
|
end
|
112
|
-
end
|
113
153
|
|
114
|
-
|
115
|
-
time = Time.now.to_i
|
116
|
-
tag = "ttl_test"
|
117
|
-
ttl = 1 # set ttl to 1 second
|
154
|
+
end # context writing
|
118
155
|
|
119
|
-
|
120
|
-
driver.emit({'tag' => tag, 'time' => time, 'a' => 1})
|
121
|
-
driver.run
|
122
|
-
|
123
|
-
# verify record... should return in less than one sec if hitting
|
124
|
-
# cassandra running on localhost
|
125
|
-
events = driver.instance.connection.execute("SELECT * FROM events where ts = #{time}")
|
126
|
-
events.rows.should eq(1)
|
127
|
-
|
128
|
-
# now, sleep long enough for the event to be expired from cassandra
|
129
|
-
sleep(ttl)
|
130
|
-
|
131
|
-
# re-query and verify that no events were returned
|
132
|
-
events = driver.instance.connection.execute("SELECT * FROM events where ts = #{time}")
|
133
|
-
events.rows.should eq(0)
|
134
|
-
end
|
135
|
-
|
136
|
-
end
|
156
|
+
end # context logging
|
137
157
|
|
138
158
|
end # CassandraOutput
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,56 @@
|
|
1
|
+
module Helpers
|
2
|
+
|
3
|
+
def write(driver, column_family_name, tag_and_time_only)
|
4
|
+
tag1 = "test1"
|
5
|
+
tag2 = "test2"
|
6
|
+
time1 = Time.now.to_i
|
7
|
+
time2 = Time.now.to_i + 2
|
8
|
+
|
9
|
+
record1 = {'tag' => tag1, 'time' => time1}
|
10
|
+
record2 = {'tag' => tag2, 'time' => time2}
|
11
|
+
|
12
|
+
unless tag_and_time_only
|
13
|
+
record1.merge!({'a' => 10, 'b' => 'Tesla'})
|
14
|
+
record2.merge!({'a' => 20, 'b' => 'Edison'})
|
15
|
+
end
|
16
|
+
|
17
|
+
# store both records in an array
|
18
|
+
records = [record1, record2]
|
19
|
+
|
20
|
+
driver.emit(records[0])
|
21
|
+
driver.emit(records[1])
|
22
|
+
driver.run # persists to cassandra
|
23
|
+
|
24
|
+
# query cassandra to verify data was correctly persisted
|
25
|
+
row_num = records.count # non-zero based index
|
26
|
+
events = driver.instance.connection.execute("SELECT * FROM #{column_family_name}")
|
27
|
+
events.rows.should eq(records.count)
|
28
|
+
events.fetch do | event | # events should be sorted desc by tag, then time
|
29
|
+
row_num -= 1 # zero-based index
|
30
|
+
|
31
|
+
record = records[row_num]
|
32
|
+
db_hash = event.to_hash
|
33
|
+
|
34
|
+
# need to take in account that we've popped both tag and time
|
35
|
+
# from the payload data when we saved it
|
36
|
+
if driver.instance.pop_data_keys
|
37
|
+
db_hash['id'].should eq(record.delete('tag'))
|
38
|
+
db_hash['ts'].should eq(record.delete('time'))
|
39
|
+
else
|
40
|
+
db_hash['id'].should eq(record['tag'])
|
41
|
+
db_hash['ts'].should eq(record['time'])
|
42
|
+
end
|
43
|
+
|
44
|
+
if driver.instance.schema.keys.count == driver.instance.data_keys.count + 1 # store as json
|
45
|
+
if record.count > 0
|
46
|
+
db_hash['payload'].should eq(record.to_json)
|
47
|
+
else
|
48
|
+
db_hash['payload'].should eq('')
|
49
|
+
end
|
50
|
+
else
|
51
|
+
db_hash['payload'].should eq(record[record.keys[db_hash.keys.index('payload')]])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-cassandra-cql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -143,6 +143,7 @@ files:
|
|
143
143
|
- spec/cassandra_output_spec.rb
|
144
144
|
- spec/spec.opts
|
145
145
|
- spec/spec_helper.rb
|
146
|
+
- spec/support/helpers.rb
|
146
147
|
homepage: http://github.com/obieq/fluent-plugin-cassandra-cql
|
147
148
|
licenses:
|
148
149
|
- MIT
|
@@ -158,7 +159,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
159
|
version: '0'
|
159
160
|
segments:
|
160
161
|
- 0
|
161
|
-
hash: -
|
162
|
+
hash: -4467205590141374709
|
162
163
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
163
164
|
none: false
|
164
165
|
requirements:
|