logstash-output-cassandra 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTORS +7 -0
- data/Gemfile +4 -0
- data/LICENSE +218 -0
- data/README.md +148 -0
- data/lib/logstash/outputs/cassandra.rb +164 -0
- data/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +65 -0
- data/lib/logstash/outputs/cassandra/buffer.rb +125 -0
- data/lib/logstash/outputs/cassandra/event_parser.rb +161 -0
- data/lib/logstash/outputs/cassandra/safe_submitter.rb +118 -0
- data/logstash-output-cassandra.gemspec +35 -0
- data/spec/cassandra_spec_helper.rb +14 -0
- data/spec/integration/outputs/cassandra_spec.rb +115 -0
- data/spec/integration/outputs/integration_helper.rb +91 -0
- data/spec/unit/outputs/backoff_retry_policy_spec.rb +131 -0
- data/spec/unit/outputs/buffer_spec.rb +119 -0
- data/spec/unit/outputs/cassandra_spec.rb +5 -0
- data/spec/unit/outputs/event_parser_spec.rb +304 -0
- data/spec/unit/outputs/safe_submitter_spec.rb +201 -0
- metadata +266 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'cassandra'
|
3
|
+
|
4
|
+
module Cassandra
|
5
|
+
module Retry
|
6
|
+
module Policies
|
7
|
+
# This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb)
|
8
|
+
# with backoff retry configuration options
|
9
|
+
class Backoff
|
10
|
+
include ::Cassandra::Retry::Policy
|
11
|
+
|
12
|
+
def initialize(opts)
|
13
|
+
@logger = opts['logger']
|
14
|
+
@backoff_type = opts['backoff_type']
|
15
|
+
@backoff_size = opts['backoff_size']
|
16
|
+
@retry_limit = opts['retry_limit']
|
17
|
+
end
|
18
|
+
|
19
|
+
def read_timeout(statement, consistency, required, received, retrieved, retries)
|
20
|
+
retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
|
21
|
+
:received => received, :retrieved => retrieved, :retries => retries })
|
22
|
+
end
|
23
|
+
|
24
|
+
def write_timeout(statement, consistency, type, required, received, retries)
|
25
|
+
retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type,
|
26
|
+
:required => required, :received => received, :retries => retries })
|
27
|
+
end
|
28
|
+
|
29
|
+
def unavailable(statement, consistency, required, alive, retries)
|
30
|
+
retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
|
31
|
+
:alive => alive, :retries => retries })
|
32
|
+
end
|
33
|
+
|
34
|
+
def retry_with_backoff(opts)
|
35
|
+
if @retry_limit > -1 && opts[:retries] > @retry_limit
|
36
|
+
@logger.error('backoff retries exhausted', :opts => opts)
|
37
|
+
return reraise
|
38
|
+
end
|
39
|
+
|
40
|
+
@logger.error('activating backoff wait', :opts => opts)
|
41
|
+
backoff_wait_before_next_retry(opts[:retries])
|
42
|
+
|
43
|
+
try_again(opts[:consistency])
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def backoff_wait_before_next_retry(retries)
|
48
|
+
backoff_wait_time = calculate_backoff_wait_time(retries)
|
49
|
+
Kernel::sleep(backoff_wait_time)
|
50
|
+
end
|
51
|
+
|
52
|
+
def calculate_backoff_wait_time(retries)
|
53
|
+
case @backoff_type
|
54
|
+
when '**'
|
55
|
+
return @backoff_size ** retries
|
56
|
+
when '*'
|
57
|
+
return @backoff_size * retries
|
58
|
+
else
|
59
|
+
raise ArgumentError, "unknown backoff type #{@backoff_type}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "concurrent"
|
3
|
+
java_import java.util.concurrent.locks.ReentrantLock
|
4
|
+
|
5
|
+
module LogStash; module Outputs; module Cassandra
|
6
|
+
class Buffer
|
7
|
+
def initialize(logger, max_size, flush_interval, &block)
|
8
|
+
@logger = logger
|
9
|
+
# You need to aquire this for anything modifying state generally
|
10
|
+
@operations_mutex = Mutex.new
|
11
|
+
@operations_lock = java.util.concurrent.locks.ReentrantLock.new
|
12
|
+
|
13
|
+
@stopping = Concurrent::AtomicBoolean.new(false)
|
14
|
+
@max_size = max_size
|
15
|
+
@submit_proc = block
|
16
|
+
|
17
|
+
@buffer = []
|
18
|
+
|
19
|
+
@last_flush = Time.now
|
20
|
+
@flush_interval = flush_interval
|
21
|
+
@flush_thread = spawn_interval_flusher
|
22
|
+
end
|
23
|
+
|
24
|
+
def push(item)
|
25
|
+
synchronize do |buffer|
|
26
|
+
push_unsafe(item)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
alias_method :<<, :push
|
30
|
+
|
31
|
+
# Push multiple items onto the buffer in a single operation
|
32
|
+
def push_multi(items)
|
33
|
+
raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
|
34
|
+
synchronize do |buffer|
|
35
|
+
items.each {|item| push_unsafe(item) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def flush
|
40
|
+
synchronize { flush_unsafe }
|
41
|
+
end
|
42
|
+
|
43
|
+
def stop(do_flush=true,wait_complete=true)
|
44
|
+
return if stopping?
|
45
|
+
@stopping.make_true
|
46
|
+
|
47
|
+
# No need to acquire a lock in this case
|
48
|
+
return if !do_flush && !wait_complete
|
49
|
+
|
50
|
+
synchronize do
|
51
|
+
flush_unsafe if do_flush
|
52
|
+
@flush_thread.join if wait_complete
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def contents
|
57
|
+
synchronize {|buffer| buffer}
|
58
|
+
end
|
59
|
+
|
60
|
+
# For externally operating on the buffer contents
|
61
|
+
# this takes a block and will yield the internal buffer and executes
|
62
|
+
# the block in a synchronized block from the internal mutex
|
63
|
+
def synchronize
|
64
|
+
@operations_mutex.synchronize { yield(@buffer) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# These methods are private for various reasons, chief among them threadsafety!
|
68
|
+
# Many require the @operations_mutex to be locked to be safe
|
69
|
+
private
|
70
|
+
|
71
|
+
def push_unsafe(item)
|
72
|
+
@buffer << item
|
73
|
+
if @buffer.size >= @max_size
|
74
|
+
flush_unsafe
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def spawn_interval_flusher
|
79
|
+
Thread.new do
|
80
|
+
loop do
|
81
|
+
sleep 0.2
|
82
|
+
break if stopping?
|
83
|
+
synchronize { interval_flush }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def interval_flush
|
89
|
+
if last_flush_seconds_ago >= @flush_interval
|
90
|
+
begin
|
91
|
+
@logger.debug? && @logger.debug("Flushing buffer at interval",
|
92
|
+
:instance => self.inspect,
|
93
|
+
:interval => @flush_interval)
|
94
|
+
flush_unsafe
|
95
|
+
rescue StandardError => e
|
96
|
+
@logger.warn("Error flushing buffer at interval!",
|
97
|
+
:instance => self.inspect,
|
98
|
+
:message => e.message,
|
99
|
+
:class => e.class.name,
|
100
|
+
:backtrace => e.backtrace
|
101
|
+
)
|
102
|
+
rescue Exception => e
|
103
|
+
@logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def flush_unsafe
|
109
|
+
if @buffer.size > 0
|
110
|
+
@submit_proc.call(@buffer)
|
111
|
+
@buffer.clear
|
112
|
+
end
|
113
|
+
|
114
|
+
@last_flush = Time.now # This must always be set to ensure correct timer behavior
|
115
|
+
end
|
116
|
+
|
117
|
+
def last_flush_seconds_ago
|
118
|
+
Time.now - @last_flush
|
119
|
+
end
|
120
|
+
|
121
|
+
def stopping?
|
122
|
+
@stopping.true?
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end end end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'time'
|
3
|
+
require 'cassandra'
|
4
|
+
|
5
|
+
module LogStash; module Outputs; module Cassandra
|
6
|
+
# Responsible for accepting events from the pipeline and returning actions for the SafeSubmitter
|
7
|
+
class EventParser
|
8
|
+
def initialize(options)
|
9
|
+
@logger = options['logger']
|
10
|
+
@table = options['table']
|
11
|
+
@filter_transform_event_key = options['filter_transform_event_key']
|
12
|
+
assert_filter_transform_structure(options['filter_transform']) if options['filter_transform']
|
13
|
+
@filter_transform = options['filter_transform']
|
14
|
+
@hints = options['hints']
|
15
|
+
@ignore_bad_values = options['ignore_bad_values']
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(event)
|
19
|
+
action = {}
|
20
|
+
action['table'] = event.sprintf(@table)
|
21
|
+
filter_transform = get_filter_transform(event)
|
22
|
+
if filter_transform
|
23
|
+
action['data'] = {}
|
24
|
+
filter_transform.each { |filter|
|
25
|
+
add_event_value_from_filter_to_action(event, filter, action)
|
26
|
+
}
|
27
|
+
else
|
28
|
+
add_event_data_using_configured_hints(event, action)
|
29
|
+
end
|
30
|
+
|
31
|
+
@logger.debug('event parsed to action', :action => action)
|
32
|
+
action
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def get_filter_transform(event)
|
37
|
+
filter_transform = nil
|
38
|
+
if @filter_transform_event_key
|
39
|
+
filter_transform = event[@filter_transform_event_key]
|
40
|
+
assert_filter_transform_structure(filter_transform)
|
41
|
+
elsif @filter_transform.length > 0
|
42
|
+
filter_transform = @filter_transform
|
43
|
+
end
|
44
|
+
filter_transform
|
45
|
+
end
|
46
|
+
|
47
|
+
def assert_filter_transform_structure(filter_transform)
|
48
|
+
filter_transform.each { |item|
|
49
|
+
if !item.has_key?('event_key') || !item.has_key?('column_name')
|
50
|
+
raise ArgumentError, "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}"
|
51
|
+
end
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def add_event_value_from_filter_to_action(event, filter, action)
|
56
|
+
event_data = event.sprintf(filter['event_key'])
|
57
|
+
unless filter.fetch('expansion_only', false)
|
58
|
+
event_data = event[event_data]
|
59
|
+
end
|
60
|
+
if filter.has_key?('cassandra_type')
|
61
|
+
cassandra_type = event.sprintf(filter['cassandra_type'])
|
62
|
+
event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
|
63
|
+
end
|
64
|
+
column_name = event.sprintf(filter['column_name'])
|
65
|
+
action['data'][column_name] = event_data
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_event_data_using_configured_hints(event, action)
|
69
|
+
action_data = event.to_hash
|
70
|
+
# Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly
|
71
|
+
action_data.reject!{|key| %r{^@} =~ key}
|
72
|
+
@hints.each do |event_key, cassandra_type|
|
73
|
+
if action_data.has_key?(event_key)
|
74
|
+
action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
action['data'] = action_data
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
|
81
|
+
typed_event_data = nil
|
82
|
+
begin
|
83
|
+
typed_event_data = convert_value_to_cassandra_type(event_data, cassandra_type)
|
84
|
+
rescue Exception => e
|
85
|
+
error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type"
|
86
|
+
if @ignore_bad_values
|
87
|
+
case cassandra_type
|
88
|
+
when 'float', 'int', 'varint', 'bigint', 'double', 'counter', 'timestamp'
|
89
|
+
typed_event_data = convert_value_to_cassandra_type(0, cassandra_type)
|
90
|
+
when 'timeuuid'
|
91
|
+
typed_event_data = convert_value_to_cassandra_type('00000000-0000-0000-0000-000000000000', cassandra_type)
|
92
|
+
when 'inet'
|
93
|
+
typed_event_data = convert_value_to_cassandra_type('0.0.0.0', cassandra_type)
|
94
|
+
when /^set<.*>$/
|
95
|
+
typed_event_data = convert_value_to_cassandra_type([], cassandra_type)
|
96
|
+
else
|
97
|
+
raise ArgumentError, "unable to provide a default value for type #{event_data}"
|
98
|
+
end
|
99
|
+
@logger.warn(error_message, :exception => e, :backtrace => e.backtrace)
|
100
|
+
else
|
101
|
+
@logger.error(error_message, :exception => e, :backtrace => e.backtrace)
|
102
|
+
raise error_message
|
103
|
+
end
|
104
|
+
end
|
105
|
+
typed_event_data
|
106
|
+
end
|
107
|
+
|
108
|
+
def convert_value_to_cassandra_type(event_data, cassandra_type)
|
109
|
+
case cassandra_type
|
110
|
+
when 'timestamp'
|
111
|
+
converted_value = event_data
|
112
|
+
if converted_value.is_a?(Numeric)
|
113
|
+
converted_value = Time.at(converted_value)
|
114
|
+
elsif converted_value.respond_to?(:to_s)
|
115
|
+
converted_value = Time::parse(event_data.to_s)
|
116
|
+
end
|
117
|
+
return ::Cassandra::Types::Timestamp.new(converted_value)
|
118
|
+
when 'inet'
|
119
|
+
return ::Cassandra::Types::Inet.new(event_data)
|
120
|
+
when 'float'
|
121
|
+
return ::Cassandra::Types::Float.new(event_data)
|
122
|
+
when 'varchar'
|
123
|
+
return ::Cassandra::Types::Varchar.new(event_data)
|
124
|
+
when 'text'
|
125
|
+
return ::Cassandra::Types::Text.new(event_data)
|
126
|
+
when 'blob'
|
127
|
+
return ::Cassandra::Types::Blob.new(event_data)
|
128
|
+
when 'ascii'
|
129
|
+
return ::Cassandra::Types::Ascii.new(event_data)
|
130
|
+
when 'bigint'
|
131
|
+
return ::Cassandra::Types::Bigint.new(event_data)
|
132
|
+
when 'counter'
|
133
|
+
return ::Cassandra::Types::Counter.new(event_data)
|
134
|
+
when 'int'
|
135
|
+
return ::Cassandra::Types::Int.new(event_data)
|
136
|
+
when 'varint'
|
137
|
+
return ::Cassandra::Types::Varint.new(event_data)
|
138
|
+
when 'boolean'
|
139
|
+
return ::Cassandra::Types::Boolean.new(event_data)
|
140
|
+
when 'decimal'
|
141
|
+
return ::Cassandra::Types::Decimal.new(event_data)
|
142
|
+
when 'double'
|
143
|
+
return ::Cassandra::Types::Double.new(event_data)
|
144
|
+
when 'timeuuid'
|
145
|
+
return ::Cassandra::Types::Timeuuid.new(event_data)
|
146
|
+
when /^set<(.*)>$/
|
147
|
+
# convert each value
|
148
|
+
# then add all to an array and convert to set
|
149
|
+
converted_items = ::Set.new
|
150
|
+
set_type = $1
|
151
|
+
event_data.each { |item|
|
152
|
+
converted_item = convert_value_to_cassandra_type(item, set_type)
|
153
|
+
converted_items.add(converted_item)
|
154
|
+
}
|
155
|
+
return converted_items
|
156
|
+
else
|
157
|
+
raise "Unknown cassandra_type #{name}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end end end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'thread'
|
3
|
+
require 'cassandra'
|
4
|
+
require 'logstash/outputs/cassandra/backoff_retry_policy'
|
5
|
+
|
6
|
+
module LogStash; module Outputs; module Cassandra
|
7
|
+
# Responsible for submitting parsed actions to cassandra (with or without a retry mechanism)
|
8
|
+
class SafeSubmitter
|
9
|
+
def initialize(options)
|
10
|
+
@statement_cache = {}
|
11
|
+
@logger = options['logger']
|
12
|
+
setup_cassandra_session(options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def submit(actions)
|
16
|
+
queries = prepare_queries(actions)
|
17
|
+
execute_queries_with_retries(queries)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def setup_cassandra_session(options)
|
22
|
+
@retry_policy = get_retry_policy(options['retry_policy'])
|
23
|
+
@consistency = options['consistency'].to_sym
|
24
|
+
cluster = options['cassandra'].cluster(
|
25
|
+
username: options['username'],
|
26
|
+
password: options['password'],
|
27
|
+
protocol_version: options['protocol_version'],
|
28
|
+
hosts: options['hosts'],
|
29
|
+
port: options['port'],
|
30
|
+
consistency: @consistency,
|
31
|
+
timeout: options['request_timeout'],
|
32
|
+
retry_policy: @retry_policy,
|
33
|
+
logger: options['logger']
|
34
|
+
)
|
35
|
+
@session = cluster.connect(options['keyspace'])
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_retry_policy(retry_policy)
|
39
|
+
case retry_policy['type']
|
40
|
+
when 'default'
|
41
|
+
return ::Cassandra::Retry::Policies::Default.new
|
42
|
+
when 'downgrading_consistency'
|
43
|
+
return ::Cassandra::Retry::Policies::DowngradingConsistency.new
|
44
|
+
when 'failthrough'
|
45
|
+
return ::Cassandra::Retry::Policies::Fallthrough.new
|
46
|
+
when 'backoff'
|
47
|
+
return ::Cassandra::Retry::Policies::Backoff.new({
|
48
|
+
'backoff_type' => retry_policy['backoff_type'], 'backoff_size' => retry_policy['backoff_size'],
|
49
|
+
'retry_limit' => retry_policy['retry_limit'], 'logger' => @logger
|
50
|
+
})
|
51
|
+
else
|
52
|
+
raise ArgumentError, "unknown retry policy type: #{retry_policy['type']}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def prepare_queries(actions)
|
57
|
+
remaining_queries = Queue.new
|
58
|
+
actions.each do |action|
|
59
|
+
begin
|
60
|
+
query = get_query(action)
|
61
|
+
remaining_queries << { :query => query, :arguments => action['data'].values }
|
62
|
+
rescue Exception => e
|
63
|
+
@logger.error('Failed to prepare query', :action => action, :exception => e, :backtrace => e.backtrace)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
remaining_queries
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_query(action)
|
70
|
+
@logger.debug('generating query for action', :action => action)
|
71
|
+
action_data = action['data']
|
72
|
+
query =
|
73
|
+
"INSERT INTO #{action['table']} (#{action_data.keys.join(', ')})
|
74
|
+
VALUES (#{('?' * action_data.keys.count).split(//) * ', '})"
|
75
|
+
unless @statement_cache.has_key?(query)
|
76
|
+
@logger.debug('preparing new query', :query => query)
|
77
|
+
@statement_cache[query] = @session.prepare(query)
|
78
|
+
end
|
79
|
+
@statement_cache[query]
|
80
|
+
end
|
81
|
+
|
82
|
+
def execute_queries_with_retries(queries)
|
83
|
+
retries = 0
|
84
|
+
while queries.length > 0
|
85
|
+
execute_queries(queries, retries)
|
86
|
+
retries += 1
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def execute_queries(queries, retries)
|
91
|
+
futures = []
|
92
|
+
while queries.length > 0
|
93
|
+
query = queries.pop
|
94
|
+
begin
|
95
|
+
future = execute_async(query, retries, queries)
|
96
|
+
futures << future
|
97
|
+
rescue Exception => e
|
98
|
+
@logger.error('Failed to send query', :query => query, :exception => e, :backtrace => e.backtrace)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
futures.each(&:join)
|
102
|
+
end
|
103
|
+
|
104
|
+
def execute_async(query, retries, queries)
|
105
|
+
future = @session.execute_async(query[:query], arguments: query[:arguments])
|
106
|
+
future.on_failure { |error|
|
107
|
+
@logger.error('Failed to execute query', :query => query, :error => error)
|
108
|
+
if @retry_policy.is_a?(::Cassandra::Retry::Policies::Backoff)
|
109
|
+
decision = @retry_policy.retry_with_backoff({ :retries => retries, :consistency => @consistency })
|
110
|
+
if decision.is_a?(::Cassandra::Retry::Decisions::Retry)
|
111
|
+
queries << query
|
112
|
+
end
|
113
|
+
end
|
114
|
+
}
|
115
|
+
future
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end end end
|