logstash-output-cassandra 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ require 'cassandra'
3
+
4
+ module Cassandra
5
+ module Retry
6
+ module Policies
7
+ # This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb)
8
+ # with backoff retry configuration options
9
+ class Backoff
10
+ include ::Cassandra::Retry::Policy
11
+
12
+ def initialize(opts)
13
+ @logger = opts['logger']
14
+ @backoff_type = opts['backoff_type']
15
+ @backoff_size = opts['backoff_size']
16
+ @retry_limit = opts['retry_limit']
17
+ end
18
+
19
+ def read_timeout(statement, consistency, required, received, retrieved, retries)
20
+ retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
21
+ :received => received, :retrieved => retrieved, :retries => retries })
22
+ end
23
+
24
+ def write_timeout(statement, consistency, type, required, received, retries)
25
+ retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type,
26
+ :required => required, :received => received, :retries => retries })
27
+ end
28
+
29
+ def unavailable(statement, consistency, required, alive, retries)
30
+ retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
31
+ :alive => alive, :retries => retries })
32
+ end
33
+
34
+ def retry_with_backoff(opts)
35
+ if @retry_limit > -1 && opts[:retries] > @retry_limit
36
+ @logger.error('backoff retries exhausted', :opts => opts)
37
+ return reraise
38
+ end
39
+
40
+ @logger.error('activating backoff wait', :opts => opts)
41
+ backoff_wait_before_next_retry(opts[:retries])
42
+
43
+ try_again(opts[:consistency])
44
+ end
45
+
46
+ private
47
+ def backoff_wait_before_next_retry(retries)
48
+ backoff_wait_time = calculate_backoff_wait_time(retries)
49
+ Kernel::sleep(backoff_wait_time)
50
+ end
51
+
52
+ def calculate_backoff_wait_time(retries)
53
+ case @backoff_type
54
+ when '**'
55
+ return @backoff_size ** retries
56
+ when '*'
57
+ return @backoff_size * retries
58
+ else
59
+ raise ArgumentError, "unknown backoff type #{@backoff_type}"
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,125 @@
1
+ # encoding: utf-8
2
+ require "concurrent"
3
+ java_import java.util.concurrent.locks.ReentrantLock
4
+
5
+ module LogStash; module Outputs; module Cassandra
6
+ class Buffer
7
+ def initialize(logger, max_size, flush_interval, &block)
8
+ @logger = logger
9
+ # You need to aquire this for anything modifying state generally
10
+ @operations_mutex = Mutex.new
11
+ @operations_lock = java.util.concurrent.locks.ReentrantLock.new
12
+
13
+ @stopping = Concurrent::AtomicBoolean.new(false)
14
+ @max_size = max_size
15
+ @submit_proc = block
16
+
17
+ @buffer = []
18
+
19
+ @last_flush = Time.now
20
+ @flush_interval = flush_interval
21
+ @flush_thread = spawn_interval_flusher
22
+ end
23
+
24
+ def push(item)
25
+ synchronize do |buffer|
26
+ push_unsafe(item)
27
+ end
28
+ end
29
+ alias_method :<<, :push
30
+
31
+ # Push multiple items onto the buffer in a single operation
32
+ def push_multi(items)
33
+ raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
34
+ synchronize do |buffer|
35
+ items.each {|item| push_unsafe(item) }
36
+ end
37
+ end
38
+
39
+ def flush
40
+ synchronize { flush_unsafe }
41
+ end
42
+
43
+ def stop(do_flush=true,wait_complete=true)
44
+ return if stopping?
45
+ @stopping.make_true
46
+
47
+ # No need to acquire a lock in this case
48
+ return if !do_flush && !wait_complete
49
+
50
+ synchronize do
51
+ flush_unsafe if do_flush
52
+ @flush_thread.join if wait_complete
53
+ end
54
+ end
55
+
56
+ def contents
57
+ synchronize {|buffer| buffer}
58
+ end
59
+
60
+ # For externally operating on the buffer contents
61
+ # this takes a block and will yield the internal buffer and executes
62
+ # the block in a synchronized block from the internal mutex
63
+ def synchronize
64
+ @operations_mutex.synchronize { yield(@buffer) }
65
+ end
66
+
67
+ # These methods are private for various reasons, chief among them threadsafety!
68
+ # Many require the @operations_mutex to be locked to be safe
69
+ private
70
+
71
+ def push_unsafe(item)
72
+ @buffer << item
73
+ if @buffer.size >= @max_size
74
+ flush_unsafe
75
+ end
76
+ end
77
+
78
+ def spawn_interval_flusher
79
+ Thread.new do
80
+ loop do
81
+ sleep 0.2
82
+ break if stopping?
83
+ synchronize { interval_flush }
84
+ end
85
+ end
86
+ end
87
+
88
+ def interval_flush
89
+ if last_flush_seconds_ago >= @flush_interval
90
+ begin
91
+ @logger.debug? && @logger.debug("Flushing buffer at interval",
92
+ :instance => self.inspect,
93
+ :interval => @flush_interval)
94
+ flush_unsafe
95
+ rescue StandardError => e
96
+ @logger.warn("Error flushing buffer at interval!",
97
+ :instance => self.inspect,
98
+ :message => e.message,
99
+ :class => e.class.name,
100
+ :backtrace => e.backtrace
101
+ )
102
+ rescue Exception => e
103
+ @logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
104
+ end
105
+ end
106
+ end
107
+
108
+ def flush_unsafe
109
+ if @buffer.size > 0
110
+ @submit_proc.call(@buffer)
111
+ @buffer.clear
112
+ end
113
+
114
+ @last_flush = Time.now # This must always be set to ensure correct timer behavior
115
+ end
116
+
117
+ def last_flush_seconds_ago
118
+ Time.now - @last_flush
119
+ end
120
+
121
+ def stopping?
122
+ @stopping.true?
123
+ end
124
+ end
125
+ end end end
@@ -0,0 +1,161 @@
1
+ # encoding: utf-8
2
+ require 'time'
3
+ require 'cassandra'
4
+
5
+ module LogStash; module Outputs; module Cassandra
6
+ # Responsible for accepting events from the pipeline and returning actions for the SafeSubmitter
7
+ class EventParser
8
+ def initialize(options)
9
+ @logger = options['logger']
10
+ @table = options['table']
11
+ @filter_transform_event_key = options['filter_transform_event_key']
12
+ assert_filter_transform_structure(options['filter_transform']) if options['filter_transform']
13
+ @filter_transform = options['filter_transform']
14
+ @hints = options['hints']
15
+ @ignore_bad_values = options['ignore_bad_values']
16
+ end
17
+
18
+ def parse(event)
19
+ action = {}
20
+ action['table'] = event.sprintf(@table)
21
+ filter_transform = get_filter_transform(event)
22
+ if filter_transform
23
+ action['data'] = {}
24
+ filter_transform.each { |filter|
25
+ add_event_value_from_filter_to_action(event, filter, action)
26
+ }
27
+ else
28
+ add_event_data_using_configured_hints(event, action)
29
+ end
30
+
31
+ @logger.debug('event parsed to action', :action => action)
32
+ action
33
+ end
34
+
35
+ private
36
+ def get_filter_transform(event)
37
+ filter_transform = nil
38
+ if @filter_transform_event_key
39
+ filter_transform = event[@filter_transform_event_key]
40
+ assert_filter_transform_structure(filter_transform)
41
+ elsif @filter_transform.length > 0
42
+ filter_transform = @filter_transform
43
+ end
44
+ filter_transform
45
+ end
46
+
47
+ def assert_filter_transform_structure(filter_transform)
48
+ filter_transform.each { |item|
49
+ if !item.has_key?('event_key') || !item.has_key?('column_name')
50
+ raise ArgumentError, "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}"
51
+ end
52
+ }
53
+ end
54
+
55
+ def add_event_value_from_filter_to_action(event, filter, action)
56
+ event_data = event.sprintf(filter['event_key'])
57
+ unless filter.fetch('expansion_only', false)
58
+ event_data = event[event_data]
59
+ end
60
+ if filter.has_key?('cassandra_type')
61
+ cassandra_type = event.sprintf(filter['cassandra_type'])
62
+ event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
63
+ end
64
+ column_name = event.sprintf(filter['column_name'])
65
+ action['data'][column_name] = event_data
66
+ end
67
+
68
+ def add_event_data_using_configured_hints(event, action)
69
+ action_data = event.to_hash
70
+ # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly
71
+ action_data.reject!{|key| %r{^@} =~ key}
72
+ @hints.each do |event_key, cassandra_type|
73
+ if action_data.has_key?(event_key)
74
+ action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type)
75
+ end
76
+ end
77
+ action['data'] = action_data
78
+ end
79
+
80
+ def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
81
+ typed_event_data = nil
82
+ begin
83
+ typed_event_data = convert_value_to_cassandra_type(event_data, cassandra_type)
84
+ rescue Exception => e
85
+ error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type"
86
+ if @ignore_bad_values
87
+ case cassandra_type
88
+ when 'float', 'int', 'varint', 'bigint', 'double', 'counter', 'timestamp'
89
+ typed_event_data = convert_value_to_cassandra_type(0, cassandra_type)
90
+ when 'timeuuid'
91
+ typed_event_data = convert_value_to_cassandra_type('00000000-0000-0000-0000-000000000000', cassandra_type)
92
+ when 'inet'
93
+ typed_event_data = convert_value_to_cassandra_type('0.0.0.0', cassandra_type)
94
+ when /^set<.*>$/
95
+ typed_event_data = convert_value_to_cassandra_type([], cassandra_type)
96
+ else
97
+ raise ArgumentError, "unable to provide a default value for type #{event_data}"
98
+ end
99
+ @logger.warn(error_message, :exception => e, :backtrace => e.backtrace)
100
+ else
101
+ @logger.error(error_message, :exception => e, :backtrace => e.backtrace)
102
+ raise error_message
103
+ end
104
+ end
105
+ typed_event_data
106
+ end
107
+
108
+ def convert_value_to_cassandra_type(event_data, cassandra_type)
109
+ case cassandra_type
110
+ when 'timestamp'
111
+ converted_value = event_data
112
+ if converted_value.is_a?(Numeric)
113
+ converted_value = Time.at(converted_value)
114
+ elsif converted_value.respond_to?(:to_s)
115
+ converted_value = Time::parse(event_data.to_s)
116
+ end
117
+ return ::Cassandra::Types::Timestamp.new(converted_value)
118
+ when 'inet'
119
+ return ::Cassandra::Types::Inet.new(event_data)
120
+ when 'float'
121
+ return ::Cassandra::Types::Float.new(event_data)
122
+ when 'varchar'
123
+ return ::Cassandra::Types::Varchar.new(event_data)
124
+ when 'text'
125
+ return ::Cassandra::Types::Text.new(event_data)
126
+ when 'blob'
127
+ return ::Cassandra::Types::Blob.new(event_data)
128
+ when 'ascii'
129
+ return ::Cassandra::Types::Ascii.new(event_data)
130
+ when 'bigint'
131
+ return ::Cassandra::Types::Bigint.new(event_data)
132
+ when 'counter'
133
+ return ::Cassandra::Types::Counter.new(event_data)
134
+ when 'int'
135
+ return ::Cassandra::Types::Int.new(event_data)
136
+ when 'varint'
137
+ return ::Cassandra::Types::Varint.new(event_data)
138
+ when 'boolean'
139
+ return ::Cassandra::Types::Boolean.new(event_data)
140
+ when 'decimal'
141
+ return ::Cassandra::Types::Decimal.new(event_data)
142
+ when 'double'
143
+ return ::Cassandra::Types::Double.new(event_data)
144
+ when 'timeuuid'
145
+ return ::Cassandra::Types::Timeuuid.new(event_data)
146
+ when /^set<(.*)>$/
147
+ # convert each value
148
+ # then add all to an array and convert to set
149
+ converted_items = ::Set.new
150
+ set_type = $1
151
+ event_data.each { |item|
152
+ converted_item = convert_value_to_cassandra_type(item, set_type)
153
+ converted_items.add(converted_item)
154
+ }
155
+ return converted_items
156
+ else
157
+ raise "Unknown cassandra_type #{name}"
158
+ end
159
+ end
160
+ end
161
+ end end end
@@ -0,0 +1,118 @@
1
+ # encoding: utf-8
2
+ require 'thread'
3
+ require 'cassandra'
4
+ require 'logstash/outputs/cassandra/backoff_retry_policy'
5
+
6
+ module LogStash; module Outputs; module Cassandra
7
+ # Responsible for submitting parsed actions to cassandra (with or without a retry mechanism)
8
+ class SafeSubmitter
9
+ def initialize(options)
10
+ @statement_cache = {}
11
+ @logger = options['logger']
12
+ setup_cassandra_session(options)
13
+ end
14
+
15
+ def submit(actions)
16
+ queries = prepare_queries(actions)
17
+ execute_queries_with_retries(queries)
18
+ end
19
+
20
+ private
21
+ def setup_cassandra_session(options)
22
+ @retry_policy = get_retry_policy(options['retry_policy'])
23
+ @consistency = options['consistency'].to_sym
24
+ cluster = options['cassandra'].cluster(
25
+ username: options['username'],
26
+ password: options['password'],
27
+ protocol_version: options['protocol_version'],
28
+ hosts: options['hosts'],
29
+ port: options['port'],
30
+ consistency: @consistency,
31
+ timeout: options['request_timeout'],
32
+ retry_policy: @retry_policy,
33
+ logger: options['logger']
34
+ )
35
+ @session = cluster.connect(options['keyspace'])
36
+ end
37
+
38
+ def get_retry_policy(retry_policy)
39
+ case retry_policy['type']
40
+ when 'default'
41
+ return ::Cassandra::Retry::Policies::Default.new
42
+ when 'downgrading_consistency'
43
+ return ::Cassandra::Retry::Policies::DowngradingConsistency.new
44
+ when 'failthrough'
45
+ return ::Cassandra::Retry::Policies::Fallthrough.new
46
+ when 'backoff'
47
+ return ::Cassandra::Retry::Policies::Backoff.new({
48
+ 'backoff_type' => retry_policy['backoff_type'], 'backoff_size' => retry_policy['backoff_size'],
49
+ 'retry_limit' => retry_policy['retry_limit'], 'logger' => @logger
50
+ })
51
+ else
52
+ raise ArgumentError, "unknown retry policy type: #{retry_policy['type']}"
53
+ end
54
+ end
55
+
56
+ def prepare_queries(actions)
57
+ remaining_queries = Queue.new
58
+ actions.each do |action|
59
+ begin
60
+ query = get_query(action)
61
+ remaining_queries << { :query => query, :arguments => action['data'].values }
62
+ rescue Exception => e
63
+ @logger.error('Failed to prepare query', :action => action, :exception => e, :backtrace => e.backtrace)
64
+ end
65
+ end
66
+ remaining_queries
67
+ end
68
+
69
+ def get_query(action)
70
+ @logger.debug('generating query for action', :action => action)
71
+ action_data = action['data']
72
+ query =
73
+ "INSERT INTO #{action['table']} (#{action_data.keys.join(', ')})
74
+ VALUES (#{('?' * action_data.keys.count).split(//) * ', '})"
75
+ unless @statement_cache.has_key?(query)
76
+ @logger.debug('preparing new query', :query => query)
77
+ @statement_cache[query] = @session.prepare(query)
78
+ end
79
+ @statement_cache[query]
80
+ end
81
+
82
+ def execute_queries_with_retries(queries)
83
+ retries = 0
84
+ while queries.length > 0
85
+ execute_queries(queries, retries)
86
+ retries += 1
87
+ end
88
+ end
89
+
90
+ def execute_queries(queries, retries)
91
+ futures = []
92
+ while queries.length > 0
93
+ query = queries.pop
94
+ begin
95
+ future = execute_async(query, retries, queries)
96
+ futures << future
97
+ rescue Exception => e
98
+ @logger.error('Failed to send query', :query => query, :exception => e, :backtrace => e.backtrace)
99
+ end
100
+ end
101
+ futures.each(&:join)
102
+ end
103
+
104
+ def execute_async(query, retries, queries)
105
+ future = @session.execute_async(query[:query], arguments: query[:arguments])
106
+ future.on_failure { |error|
107
+ @logger.error('Failed to execute query', :query => query, :error => error)
108
+ if @retry_policy.is_a?(::Cassandra::Retry::Policies::Backoff)
109
+ decision = @retry_policy.retry_with_backoff({ :retries => retries, :consistency => @consistency })
110
+ if decision.is_a?(::Cassandra::Retry::Decisions::Retry)
111
+ queries << query
112
+ end
113
+ end
114
+ }
115
+ future
116
+ end
117
+ end
118
+ end end end