logstash-output-cassandra 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTORS +7 -0
- data/Gemfile +4 -0
- data/LICENSE +218 -0
- data/README.md +148 -0
- data/lib/logstash/outputs/cassandra.rb +164 -0
- data/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +65 -0
- data/lib/logstash/outputs/cassandra/buffer.rb +125 -0
- data/lib/logstash/outputs/cassandra/event_parser.rb +161 -0
- data/lib/logstash/outputs/cassandra/safe_submitter.rb +118 -0
- data/logstash-output-cassandra.gemspec +35 -0
- data/spec/cassandra_spec_helper.rb +14 -0
- data/spec/integration/outputs/cassandra_spec.rb +115 -0
- data/spec/integration/outputs/integration_helper.rb +91 -0
- data/spec/unit/outputs/backoff_retry_policy_spec.rb +131 -0
- data/spec/unit/outputs/buffer_spec.rb +119 -0
- data/spec/unit/outputs/cassandra_spec.rb +5 -0
- data/spec/unit/outputs/event_parser_spec.rb +304 -0
- data/spec/unit/outputs/safe_submitter_spec.rb +201 -0
- metadata +266 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'cassandra'
|
3
|
+
|
4
|
+
module Cassandra
|
5
|
+
module Retry
|
6
|
+
module Policies
|
7
|
+
# This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb)
|
8
|
+
# with backoff retry configuration options
|
9
|
+
class Backoff
|
10
|
+
include ::Cassandra::Retry::Policy
|
11
|
+
|
12
|
+
def initialize(opts)
|
13
|
+
@logger = opts['logger']
|
14
|
+
@backoff_type = opts['backoff_type']
|
15
|
+
@backoff_size = opts['backoff_size']
|
16
|
+
@retry_limit = opts['retry_limit']
|
17
|
+
end
|
18
|
+
|
19
|
+
def read_timeout(statement, consistency, required, received, retrieved, retries)
|
20
|
+
retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
|
21
|
+
:received => received, :retrieved => retrieved, :retries => retries })
|
22
|
+
end
|
23
|
+
|
24
|
+
def write_timeout(statement, consistency, type, required, received, retries)
|
25
|
+
retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type,
|
26
|
+
:required => required, :received => received, :retries => retries })
|
27
|
+
end
|
28
|
+
|
29
|
+
def unavailable(statement, consistency, required, alive, retries)
|
30
|
+
retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
|
31
|
+
:alive => alive, :retries => retries })
|
32
|
+
end
|
33
|
+
|
34
|
+
def retry_with_backoff(opts)
|
35
|
+
if @retry_limit > -1 && opts[:retries] > @retry_limit
|
36
|
+
@logger.error('backoff retries exhausted', :opts => opts)
|
37
|
+
return reraise
|
38
|
+
end
|
39
|
+
|
40
|
+
@logger.error('activating backoff wait', :opts => opts)
|
41
|
+
backoff_wait_before_next_retry(opts[:retries])
|
42
|
+
|
43
|
+
try_again(opts[:consistency])
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def backoff_wait_before_next_retry(retries)
|
48
|
+
backoff_wait_time = calculate_backoff_wait_time(retries)
|
49
|
+
Kernel::sleep(backoff_wait_time)
|
50
|
+
end
|
51
|
+
|
52
|
+
def calculate_backoff_wait_time(retries)
|
53
|
+
case @backoff_type
|
54
|
+
when '**'
|
55
|
+
return @backoff_size ** retries
|
56
|
+
when '*'
|
57
|
+
return @backoff_size * retries
|
58
|
+
else
|
59
|
+
raise ArgumentError, "unknown backoff type #{@backoff_type}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "concurrent"
|
3
|
+
java_import java.util.concurrent.locks.ReentrantLock
|
4
|
+
|
5
|
+
module LogStash; module Outputs; module Cassandra
|
6
|
+
class Buffer
|
7
|
+
def initialize(logger, max_size, flush_interval, &block)
|
8
|
+
@logger = logger
|
9
|
+
# You need to aquire this for anything modifying state generally
|
10
|
+
@operations_mutex = Mutex.new
|
11
|
+
@operations_lock = java.util.concurrent.locks.ReentrantLock.new
|
12
|
+
|
13
|
+
@stopping = Concurrent::AtomicBoolean.new(false)
|
14
|
+
@max_size = max_size
|
15
|
+
@submit_proc = block
|
16
|
+
|
17
|
+
@buffer = []
|
18
|
+
|
19
|
+
@last_flush = Time.now
|
20
|
+
@flush_interval = flush_interval
|
21
|
+
@flush_thread = spawn_interval_flusher
|
22
|
+
end
|
23
|
+
|
24
|
+
def push(item)
|
25
|
+
synchronize do |buffer|
|
26
|
+
push_unsafe(item)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
alias_method :<<, :push
|
30
|
+
|
31
|
+
# Push multiple items onto the buffer in a single operation
|
32
|
+
def push_multi(items)
|
33
|
+
raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
|
34
|
+
synchronize do |buffer|
|
35
|
+
items.each {|item| push_unsafe(item) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def flush
|
40
|
+
synchronize { flush_unsafe }
|
41
|
+
end
|
42
|
+
|
43
|
+
def stop(do_flush=true,wait_complete=true)
|
44
|
+
return if stopping?
|
45
|
+
@stopping.make_true
|
46
|
+
|
47
|
+
# No need to acquire a lock in this case
|
48
|
+
return if !do_flush && !wait_complete
|
49
|
+
|
50
|
+
synchronize do
|
51
|
+
flush_unsafe if do_flush
|
52
|
+
@flush_thread.join if wait_complete
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def contents
|
57
|
+
synchronize {|buffer| buffer}
|
58
|
+
end
|
59
|
+
|
60
|
+
# For externally operating on the buffer contents
|
61
|
+
# this takes a block and will yield the internal buffer and executes
|
62
|
+
# the block in a synchronized block from the internal mutex
|
63
|
+
def synchronize
|
64
|
+
@operations_mutex.synchronize { yield(@buffer) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# These methods are private for various reasons, chief among them threadsafety!
|
68
|
+
# Many require the @operations_mutex to be locked to be safe
|
69
|
+
private
|
70
|
+
|
71
|
+
def push_unsafe(item)
|
72
|
+
@buffer << item
|
73
|
+
if @buffer.size >= @max_size
|
74
|
+
flush_unsafe
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def spawn_interval_flusher
|
79
|
+
Thread.new do
|
80
|
+
loop do
|
81
|
+
sleep 0.2
|
82
|
+
break if stopping?
|
83
|
+
synchronize { interval_flush }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def interval_flush
|
89
|
+
if last_flush_seconds_ago >= @flush_interval
|
90
|
+
begin
|
91
|
+
@logger.debug? && @logger.debug("Flushing buffer at interval",
|
92
|
+
:instance => self.inspect,
|
93
|
+
:interval => @flush_interval)
|
94
|
+
flush_unsafe
|
95
|
+
rescue StandardError => e
|
96
|
+
@logger.warn("Error flushing buffer at interval!",
|
97
|
+
:instance => self.inspect,
|
98
|
+
:message => e.message,
|
99
|
+
:class => e.class.name,
|
100
|
+
:backtrace => e.backtrace
|
101
|
+
)
|
102
|
+
rescue Exception => e
|
103
|
+
@logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def flush_unsafe
|
109
|
+
if @buffer.size > 0
|
110
|
+
@submit_proc.call(@buffer)
|
111
|
+
@buffer.clear
|
112
|
+
end
|
113
|
+
|
114
|
+
@last_flush = Time.now # This must always be set to ensure correct timer behavior
|
115
|
+
end
|
116
|
+
|
117
|
+
def last_flush_seconds_ago
|
118
|
+
Time.now - @last_flush
|
119
|
+
end
|
120
|
+
|
121
|
+
def stopping?
|
122
|
+
@stopping.true?
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end end end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'time'
|
3
|
+
require 'cassandra'
|
4
|
+
|
5
|
+
module LogStash; module Outputs; module Cassandra
|
6
|
+
# Responsible for accepting events from the pipeline and returning actions for the SafeSubmitter
|
7
|
+
class EventParser
|
8
|
+
def initialize(options)
|
9
|
+
@logger = options['logger']
|
10
|
+
@table = options['table']
|
11
|
+
@filter_transform_event_key = options['filter_transform_event_key']
|
12
|
+
assert_filter_transform_structure(options['filter_transform']) if options['filter_transform']
|
13
|
+
@filter_transform = options['filter_transform']
|
14
|
+
@hints = options['hints']
|
15
|
+
@ignore_bad_values = options['ignore_bad_values']
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(event)
|
19
|
+
action = {}
|
20
|
+
action['table'] = event.sprintf(@table)
|
21
|
+
filter_transform = get_filter_transform(event)
|
22
|
+
if filter_transform
|
23
|
+
action['data'] = {}
|
24
|
+
filter_transform.each { |filter|
|
25
|
+
add_event_value_from_filter_to_action(event, filter, action)
|
26
|
+
}
|
27
|
+
else
|
28
|
+
add_event_data_using_configured_hints(event, action)
|
29
|
+
end
|
30
|
+
|
31
|
+
@logger.debug('event parsed to action', :action => action)
|
32
|
+
action
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def get_filter_transform(event)
|
37
|
+
filter_transform = nil
|
38
|
+
if @filter_transform_event_key
|
39
|
+
filter_transform = event[@filter_transform_event_key]
|
40
|
+
assert_filter_transform_structure(filter_transform)
|
41
|
+
elsif @filter_transform.length > 0
|
42
|
+
filter_transform = @filter_transform
|
43
|
+
end
|
44
|
+
filter_transform
|
45
|
+
end
|
46
|
+
|
47
|
+
def assert_filter_transform_structure(filter_transform)
|
48
|
+
filter_transform.each { |item|
|
49
|
+
if !item.has_key?('event_key') || !item.has_key?('column_name')
|
50
|
+
raise ArgumentError, "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}"
|
51
|
+
end
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def add_event_value_from_filter_to_action(event, filter, action)
|
56
|
+
event_data = event.sprintf(filter['event_key'])
|
57
|
+
unless filter.fetch('expansion_only', false)
|
58
|
+
event_data = event[event_data]
|
59
|
+
end
|
60
|
+
if filter.has_key?('cassandra_type')
|
61
|
+
cassandra_type = event.sprintf(filter['cassandra_type'])
|
62
|
+
event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
|
63
|
+
end
|
64
|
+
column_name = event.sprintf(filter['column_name'])
|
65
|
+
action['data'][column_name] = event_data
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_event_data_using_configured_hints(event, action)
|
69
|
+
action_data = event.to_hash
|
70
|
+
# Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly
|
71
|
+
action_data.reject!{|key| %r{^@} =~ key}
|
72
|
+
@hints.each do |event_key, cassandra_type|
|
73
|
+
if action_data.has_key?(event_key)
|
74
|
+
action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
action['data'] = action_data
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
|
81
|
+
typed_event_data = nil
|
82
|
+
begin
|
83
|
+
typed_event_data = convert_value_to_cassandra_type(event_data, cassandra_type)
|
84
|
+
rescue Exception => e
|
85
|
+
error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type"
|
86
|
+
if @ignore_bad_values
|
87
|
+
case cassandra_type
|
88
|
+
when 'float', 'int', 'varint', 'bigint', 'double', 'counter', 'timestamp'
|
89
|
+
typed_event_data = convert_value_to_cassandra_type(0, cassandra_type)
|
90
|
+
when 'timeuuid'
|
91
|
+
typed_event_data = convert_value_to_cassandra_type('00000000-0000-0000-0000-000000000000', cassandra_type)
|
92
|
+
when 'inet'
|
93
|
+
typed_event_data = convert_value_to_cassandra_type('0.0.0.0', cassandra_type)
|
94
|
+
when /^set<.*>$/
|
95
|
+
typed_event_data = convert_value_to_cassandra_type([], cassandra_type)
|
96
|
+
else
|
97
|
+
raise ArgumentError, "unable to provide a default value for type #{event_data}"
|
98
|
+
end
|
99
|
+
@logger.warn(error_message, :exception => e, :backtrace => e.backtrace)
|
100
|
+
else
|
101
|
+
@logger.error(error_message, :exception => e, :backtrace => e.backtrace)
|
102
|
+
raise error_message
|
103
|
+
end
|
104
|
+
end
|
105
|
+
typed_event_data
|
106
|
+
end
|
107
|
+
|
108
|
+
def convert_value_to_cassandra_type(event_data, cassandra_type)
|
109
|
+
case cassandra_type
|
110
|
+
when 'timestamp'
|
111
|
+
converted_value = event_data
|
112
|
+
if converted_value.is_a?(Numeric)
|
113
|
+
converted_value = Time.at(converted_value)
|
114
|
+
elsif converted_value.respond_to?(:to_s)
|
115
|
+
converted_value = Time::parse(event_data.to_s)
|
116
|
+
end
|
117
|
+
return ::Cassandra::Types::Timestamp.new(converted_value)
|
118
|
+
when 'inet'
|
119
|
+
return ::Cassandra::Types::Inet.new(event_data)
|
120
|
+
when 'float'
|
121
|
+
return ::Cassandra::Types::Float.new(event_data)
|
122
|
+
when 'varchar'
|
123
|
+
return ::Cassandra::Types::Varchar.new(event_data)
|
124
|
+
when 'text'
|
125
|
+
return ::Cassandra::Types::Text.new(event_data)
|
126
|
+
when 'blob'
|
127
|
+
return ::Cassandra::Types::Blob.new(event_data)
|
128
|
+
when 'ascii'
|
129
|
+
return ::Cassandra::Types::Ascii.new(event_data)
|
130
|
+
when 'bigint'
|
131
|
+
return ::Cassandra::Types::Bigint.new(event_data)
|
132
|
+
when 'counter'
|
133
|
+
return ::Cassandra::Types::Counter.new(event_data)
|
134
|
+
when 'int'
|
135
|
+
return ::Cassandra::Types::Int.new(event_data)
|
136
|
+
when 'varint'
|
137
|
+
return ::Cassandra::Types::Varint.new(event_data)
|
138
|
+
when 'boolean'
|
139
|
+
return ::Cassandra::Types::Boolean.new(event_data)
|
140
|
+
when 'decimal'
|
141
|
+
return ::Cassandra::Types::Decimal.new(event_data)
|
142
|
+
when 'double'
|
143
|
+
return ::Cassandra::Types::Double.new(event_data)
|
144
|
+
when 'timeuuid'
|
145
|
+
return ::Cassandra::Types::Timeuuid.new(event_data)
|
146
|
+
when /^set<(.*)>$/
|
147
|
+
# convert each value
|
148
|
+
# then add all to an array and convert to set
|
149
|
+
converted_items = ::Set.new
|
150
|
+
set_type = $1
|
151
|
+
event_data.each { |item|
|
152
|
+
converted_item = convert_value_to_cassandra_type(item, set_type)
|
153
|
+
converted_items.add(converted_item)
|
154
|
+
}
|
155
|
+
return converted_items
|
156
|
+
else
|
157
|
+
raise "Unknown cassandra_type #{name}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end end end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'thread'
|
3
|
+
require 'cassandra'
|
4
|
+
require 'logstash/outputs/cassandra/backoff_retry_policy'
|
5
|
+
|
6
|
+
module LogStash; module Outputs; module Cassandra
|
7
|
+
# Responsible for submitting parsed actions to cassandra (with or without a retry mechanism)
|
8
|
+
class SafeSubmitter
|
9
|
+
def initialize(options)
|
10
|
+
@statement_cache = {}
|
11
|
+
@logger = options['logger']
|
12
|
+
setup_cassandra_session(options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def submit(actions)
|
16
|
+
queries = prepare_queries(actions)
|
17
|
+
execute_queries_with_retries(queries)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def setup_cassandra_session(options)
|
22
|
+
@retry_policy = get_retry_policy(options['retry_policy'])
|
23
|
+
@consistency = options['consistency'].to_sym
|
24
|
+
cluster = options['cassandra'].cluster(
|
25
|
+
username: options['username'],
|
26
|
+
password: options['password'],
|
27
|
+
protocol_version: options['protocol_version'],
|
28
|
+
hosts: options['hosts'],
|
29
|
+
port: options['port'],
|
30
|
+
consistency: @consistency,
|
31
|
+
timeout: options['request_timeout'],
|
32
|
+
retry_policy: @retry_policy,
|
33
|
+
logger: options['logger']
|
34
|
+
)
|
35
|
+
@session = cluster.connect(options['keyspace'])
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_retry_policy(retry_policy)
|
39
|
+
case retry_policy['type']
|
40
|
+
when 'default'
|
41
|
+
return ::Cassandra::Retry::Policies::Default.new
|
42
|
+
when 'downgrading_consistency'
|
43
|
+
return ::Cassandra::Retry::Policies::DowngradingConsistency.new
|
44
|
+
when 'failthrough'
|
45
|
+
return ::Cassandra::Retry::Policies::Fallthrough.new
|
46
|
+
when 'backoff'
|
47
|
+
return ::Cassandra::Retry::Policies::Backoff.new({
|
48
|
+
'backoff_type' => retry_policy['backoff_type'], 'backoff_size' => retry_policy['backoff_size'],
|
49
|
+
'retry_limit' => retry_policy['retry_limit'], 'logger' => @logger
|
50
|
+
})
|
51
|
+
else
|
52
|
+
raise ArgumentError, "unknown retry policy type: #{retry_policy['type']}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def prepare_queries(actions)
|
57
|
+
remaining_queries = Queue.new
|
58
|
+
actions.each do |action|
|
59
|
+
begin
|
60
|
+
query = get_query(action)
|
61
|
+
remaining_queries << { :query => query, :arguments => action['data'].values }
|
62
|
+
rescue Exception => e
|
63
|
+
@logger.error('Failed to prepare query', :action => action, :exception => e, :backtrace => e.backtrace)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
remaining_queries
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_query(action)
|
70
|
+
@logger.debug('generating query for action', :action => action)
|
71
|
+
action_data = action['data']
|
72
|
+
query =
|
73
|
+
"INSERT INTO #{action['table']} (#{action_data.keys.join(', ')})
|
74
|
+
VALUES (#{('?' * action_data.keys.count).split(//) * ', '})"
|
75
|
+
unless @statement_cache.has_key?(query)
|
76
|
+
@logger.debug('preparing new query', :query => query)
|
77
|
+
@statement_cache[query] = @session.prepare(query)
|
78
|
+
end
|
79
|
+
@statement_cache[query]
|
80
|
+
end
|
81
|
+
|
82
|
+
def execute_queries_with_retries(queries)
|
83
|
+
retries = 0
|
84
|
+
while queries.length > 0
|
85
|
+
execute_queries(queries, retries)
|
86
|
+
retries += 1
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def execute_queries(queries, retries)
|
91
|
+
futures = []
|
92
|
+
while queries.length > 0
|
93
|
+
query = queries.pop
|
94
|
+
begin
|
95
|
+
future = execute_async(query, retries, queries)
|
96
|
+
futures << future
|
97
|
+
rescue Exception => e
|
98
|
+
@logger.error('Failed to send query', :query => query, :exception => e, :backtrace => e.backtrace)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
futures.each(&:join)
|
102
|
+
end
|
103
|
+
|
104
|
+
def execute_async(query, retries, queries)
|
105
|
+
future = @session.execute_async(query[:query], arguments: query[:arguments])
|
106
|
+
future.on_failure { |error|
|
107
|
+
@logger.error('Failed to execute query', :query => query, :error => error)
|
108
|
+
if @retry_policy.is_a?(::Cassandra::Retry::Policies::Backoff)
|
109
|
+
decision = @retry_policy.retry_with_backoff({ :retries => retries, :consistency => @consistency })
|
110
|
+
if decision.is_a?(::Cassandra::Retry::Decisions::Retry)
|
111
|
+
queries << query
|
112
|
+
end
|
113
|
+
end
|
114
|
+
}
|
115
|
+
future
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end end end
|