logstash-output-cassandra 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ require 'cassandra'
3
+
4
+ module Cassandra
5
+ module Retry
6
+ module Policies
7
+ # This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb)
8
+ # with backoff retry configuration options
9
+ class Backoff
10
+ include ::Cassandra::Retry::Policy
11
+
12
+ def initialize(opts)
13
+ @logger = opts['logger']
14
+ @backoff_type = opts['backoff_type']
15
+ @backoff_size = opts['backoff_size']
16
+ @retry_limit = opts['retry_limit']
17
+ end
18
+
19
+ def read_timeout(statement, consistency, required, received, retrieved, retries)
20
+ retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
21
+ :received => received, :retrieved => retrieved, :retries => retries })
22
+ end
23
+
24
+ def write_timeout(statement, consistency, type, required, received, retries)
25
+ retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type,
26
+ :required => required, :received => received, :retries => retries })
27
+ end
28
+
29
+ def unavailable(statement, consistency, required, alive, retries)
30
+ retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required,
31
+ :alive => alive, :retries => retries })
32
+ end
33
+
34
+ def retry_with_backoff(opts)
35
+ if @retry_limit > -1 && opts[:retries] > @retry_limit
36
+ @logger.error('backoff retries exhausted', :opts => opts)
37
+ return reraise
38
+ end
39
+
40
+ @logger.error('activating backoff wait', :opts => opts)
41
+ backoff_wait_before_next_retry(opts[:retries])
42
+
43
+ try_again(opts[:consistency])
44
+ end
45
+
46
+ private
47
+ def backoff_wait_before_next_retry(retries)
48
+ backoff_wait_time = calculate_backoff_wait_time(retries)
49
+ Kernel::sleep(backoff_wait_time)
50
+ end
51
+
52
+ def calculate_backoff_wait_time(retries)
53
+ case @backoff_type
54
+ when '**'
55
+ return @backoff_size ** retries
56
+ when '*'
57
+ return @backoff_size * retries
58
+ else
59
+ raise ArgumentError, "unknown backoff type #{@backoff_type}"
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,125 @@
1
+ # encoding: utf-8
2
+ require "concurrent"
3
+ java_import java.util.concurrent.locks.ReentrantLock
4
+
5
+ module LogStash; module Outputs; module Cassandra
6
+ class Buffer
7
+ def initialize(logger, max_size, flush_interval, &block)
8
+ @logger = logger
9
+ # You need to aquire this for anything modifying state generally
10
+ @operations_mutex = Mutex.new
11
+ @operations_lock = java.util.concurrent.locks.ReentrantLock.new
12
+
13
+ @stopping = Concurrent::AtomicBoolean.new(false)
14
+ @max_size = max_size
15
+ @submit_proc = block
16
+
17
+ @buffer = []
18
+
19
+ @last_flush = Time.now
20
+ @flush_interval = flush_interval
21
+ @flush_thread = spawn_interval_flusher
22
+ end
23
+
24
+ def push(item)
25
+ synchronize do |buffer|
26
+ push_unsafe(item)
27
+ end
28
+ end
29
+ alias_method :<<, :push
30
+
31
+ # Push multiple items onto the buffer in a single operation
32
+ def push_multi(items)
33
+ raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
34
+ synchronize do |buffer|
35
+ items.each {|item| push_unsafe(item) }
36
+ end
37
+ end
38
+
39
+ def flush
40
+ synchronize { flush_unsafe }
41
+ end
42
+
43
+ def stop(do_flush=true,wait_complete=true)
44
+ return if stopping?
45
+ @stopping.make_true
46
+
47
+ # No need to acquire a lock in this case
48
+ return if !do_flush && !wait_complete
49
+
50
+ synchronize do
51
+ flush_unsafe if do_flush
52
+ @flush_thread.join if wait_complete
53
+ end
54
+ end
55
+
56
+ def contents
57
+ synchronize {|buffer| buffer}
58
+ end
59
+
60
+ # For externally operating on the buffer contents
61
+ # this takes a block and will yield the internal buffer and executes
62
+ # the block in a synchronized block from the internal mutex
63
+ def synchronize
64
+ @operations_mutex.synchronize { yield(@buffer) }
65
+ end
66
+
67
+ # These methods are private for various reasons, chief among them threadsafety!
68
+ # Many require the @operations_mutex to be locked to be safe
69
+ private
70
+
71
+ def push_unsafe(item)
72
+ @buffer << item
73
+ if @buffer.size >= @max_size
74
+ flush_unsafe
75
+ end
76
+ end
77
+
78
+ def spawn_interval_flusher
79
+ Thread.new do
80
+ loop do
81
+ sleep 0.2
82
+ break if stopping?
83
+ synchronize { interval_flush }
84
+ end
85
+ end
86
+ end
87
+
88
+ def interval_flush
89
+ if last_flush_seconds_ago >= @flush_interval
90
+ begin
91
+ @logger.debug? && @logger.debug("Flushing buffer at interval",
92
+ :instance => self.inspect,
93
+ :interval => @flush_interval)
94
+ flush_unsafe
95
+ rescue StandardError => e
96
+ @logger.warn("Error flushing buffer at interval!",
97
+ :instance => self.inspect,
98
+ :message => e.message,
99
+ :class => e.class.name,
100
+ :backtrace => e.backtrace
101
+ )
102
+ rescue Exception => e
103
+ @logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
104
+ end
105
+ end
106
+ end
107
+
108
+ def flush_unsafe
109
+ if @buffer.size > 0
110
+ @submit_proc.call(@buffer)
111
+ @buffer.clear
112
+ end
113
+
114
+ @last_flush = Time.now # This must always be set to ensure correct timer behavior
115
+ end
116
+
117
+ def last_flush_seconds_ago
118
+ Time.now - @last_flush
119
+ end
120
+
121
+ def stopping?
122
+ @stopping.true?
123
+ end
124
+ end
125
+ end end end
@@ -0,0 +1,161 @@
1
+ # encoding: utf-8
2
+ require 'time'
3
+ require 'cassandra'
4
+
5
+ module LogStash; module Outputs; module Cassandra
6
+ # Responsible for accepting events from the pipeline and returning actions for the SafeSubmitter
7
+ class EventParser
8
+ def initialize(options)
9
+ @logger = options['logger']
10
+ @table = options['table']
11
+ @filter_transform_event_key = options['filter_transform_event_key']
12
+ assert_filter_transform_structure(options['filter_transform']) if options['filter_transform']
13
+ @filter_transform = options['filter_transform']
14
+ @hints = options['hints']
15
+ @ignore_bad_values = options['ignore_bad_values']
16
+ end
17
+
18
+ def parse(event)
19
+ action = {}
20
+ action['table'] = event.sprintf(@table)
21
+ filter_transform = get_filter_transform(event)
22
+ if filter_transform
23
+ action['data'] = {}
24
+ filter_transform.each { |filter|
25
+ add_event_value_from_filter_to_action(event, filter, action)
26
+ }
27
+ else
28
+ add_event_data_using_configured_hints(event, action)
29
+ end
30
+
31
+ @logger.debug('event parsed to action', :action => action)
32
+ action
33
+ end
34
+
35
+ private
36
+ def get_filter_transform(event)
37
+ filter_transform = nil
38
+ if @filter_transform_event_key
39
+ filter_transform = event[@filter_transform_event_key]
40
+ assert_filter_transform_structure(filter_transform)
41
+ elsif @filter_transform.length > 0
42
+ filter_transform = @filter_transform
43
+ end
44
+ filter_transform
45
+ end
46
+
47
+ def assert_filter_transform_structure(filter_transform)
48
+ filter_transform.each { |item|
49
+ if !item.has_key?('event_key') || !item.has_key?('column_name')
50
+ raise ArgumentError, "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}"
51
+ end
52
+ }
53
+ end
54
+
55
+ def add_event_value_from_filter_to_action(event, filter, action)
56
+ event_data = event.sprintf(filter['event_key'])
57
+ unless filter.fetch('expansion_only', false)
58
+ event_data = event[event_data]
59
+ end
60
+ if filter.has_key?('cassandra_type')
61
+ cassandra_type = event.sprintf(filter['cassandra_type'])
62
+ event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
63
+ end
64
+ column_name = event.sprintf(filter['column_name'])
65
+ action['data'][column_name] = event_data
66
+ end
67
+
68
+ def add_event_data_using_configured_hints(event, action)
69
+ action_data = event.to_hash
70
+ # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly
71
+ action_data.reject!{|key| %r{^@} =~ key}
72
+ @hints.each do |event_key, cassandra_type|
73
+ if action_data.has_key?(event_key)
74
+ action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type)
75
+ end
76
+ end
77
+ action['data'] = action_data
78
+ end
79
+
80
+ def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type)
81
+ typed_event_data = nil
82
+ begin
83
+ typed_event_data = convert_value_to_cassandra_type(event_data, cassandra_type)
84
+ rescue Exception => e
85
+ error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type"
86
+ if @ignore_bad_values
87
+ case cassandra_type
88
+ when 'float', 'int', 'varint', 'bigint', 'double', 'counter', 'timestamp'
89
+ typed_event_data = convert_value_to_cassandra_type(0, cassandra_type)
90
+ when 'timeuuid'
91
+ typed_event_data = convert_value_to_cassandra_type('00000000-0000-0000-0000-000000000000', cassandra_type)
92
+ when 'inet'
93
+ typed_event_data = convert_value_to_cassandra_type('0.0.0.0', cassandra_type)
94
+ when /^set<.*>$/
95
+ typed_event_data = convert_value_to_cassandra_type([], cassandra_type)
96
+ else
97
+ raise ArgumentError, "unable to provide a default value for type #{event_data}"
98
+ end
99
+ @logger.warn(error_message, :exception => e, :backtrace => e.backtrace)
100
+ else
101
+ @logger.error(error_message, :exception => e, :backtrace => e.backtrace)
102
+ raise error_message
103
+ end
104
+ end
105
+ typed_event_data
106
+ end
107
+
108
+ def convert_value_to_cassandra_type(event_data, cassandra_type)
109
+ case cassandra_type
110
+ when 'timestamp'
111
+ converted_value = event_data
112
+ if converted_value.is_a?(Numeric)
113
+ converted_value = Time.at(converted_value)
114
+ elsif converted_value.respond_to?(:to_s)
115
+ converted_value = Time::parse(event_data.to_s)
116
+ end
117
+ return ::Cassandra::Types::Timestamp.new(converted_value)
118
+ when 'inet'
119
+ return ::Cassandra::Types::Inet.new(event_data)
120
+ when 'float'
121
+ return ::Cassandra::Types::Float.new(event_data)
122
+ when 'varchar'
123
+ return ::Cassandra::Types::Varchar.new(event_data)
124
+ when 'text'
125
+ return ::Cassandra::Types::Text.new(event_data)
126
+ when 'blob'
127
+ return ::Cassandra::Types::Blob.new(event_data)
128
+ when 'ascii'
129
+ return ::Cassandra::Types::Ascii.new(event_data)
130
+ when 'bigint'
131
+ return ::Cassandra::Types::Bigint.new(event_data)
132
+ when 'counter'
133
+ return ::Cassandra::Types::Counter.new(event_data)
134
+ when 'int'
135
+ return ::Cassandra::Types::Int.new(event_data)
136
+ when 'varint'
137
+ return ::Cassandra::Types::Varint.new(event_data)
138
+ when 'boolean'
139
+ return ::Cassandra::Types::Boolean.new(event_data)
140
+ when 'decimal'
141
+ return ::Cassandra::Types::Decimal.new(event_data)
142
+ when 'double'
143
+ return ::Cassandra::Types::Double.new(event_data)
144
+ when 'timeuuid'
145
+ return ::Cassandra::Types::Timeuuid.new(event_data)
146
+ when /^set<(.*)>$/
147
+ # convert each value
148
+ # then add all to an array and convert to set
149
+ converted_items = ::Set.new
150
+ set_type = $1
151
+ event_data.each { |item|
152
+ converted_item = convert_value_to_cassandra_type(item, set_type)
153
+ converted_items.add(converted_item)
154
+ }
155
+ return converted_items
156
+ else
157
+ raise "Unknown cassandra_type #{name}"
158
+ end
159
+ end
160
+ end
161
+ end end end
@@ -0,0 +1,118 @@
1
+ # encoding: utf-8
2
+ require 'thread'
3
+ require 'cassandra'
4
+ require 'logstash/outputs/cassandra/backoff_retry_policy'
5
+
6
+ module LogStash; module Outputs; module Cassandra
7
+ # Responsible for submitting parsed actions to cassandra (with or without a retry mechanism)
8
+ class SafeSubmitter
9
+ def initialize(options)
10
+ @statement_cache = {}
11
+ @logger = options['logger']
12
+ setup_cassandra_session(options)
13
+ end
14
+
15
+ def submit(actions)
16
+ queries = prepare_queries(actions)
17
+ execute_queries_with_retries(queries)
18
+ end
19
+
20
+ private
21
+ def setup_cassandra_session(options)
22
+ @retry_policy = get_retry_policy(options['retry_policy'])
23
+ @consistency = options['consistency'].to_sym
24
+ cluster = options['cassandra'].cluster(
25
+ username: options['username'],
26
+ password: options['password'],
27
+ protocol_version: options['protocol_version'],
28
+ hosts: options['hosts'],
29
+ port: options['port'],
30
+ consistency: @consistency,
31
+ timeout: options['request_timeout'],
32
+ retry_policy: @retry_policy,
33
+ logger: options['logger']
34
+ )
35
+ @session = cluster.connect(options['keyspace'])
36
+ end
37
+
38
+ def get_retry_policy(retry_policy)
39
+ case retry_policy['type']
40
+ when 'default'
41
+ return ::Cassandra::Retry::Policies::Default.new
42
+ when 'downgrading_consistency'
43
+ return ::Cassandra::Retry::Policies::DowngradingConsistency.new
44
+ when 'failthrough'
45
+ return ::Cassandra::Retry::Policies::Fallthrough.new
46
+ when 'backoff'
47
+ return ::Cassandra::Retry::Policies::Backoff.new({
48
+ 'backoff_type' => retry_policy['backoff_type'], 'backoff_size' => retry_policy['backoff_size'],
49
+ 'retry_limit' => retry_policy['retry_limit'], 'logger' => @logger
50
+ })
51
+ else
52
+ raise ArgumentError, "unknown retry policy type: #{retry_policy['type']}"
53
+ end
54
+ end
55
+
56
+ def prepare_queries(actions)
57
+ remaining_queries = Queue.new
58
+ actions.each do |action|
59
+ begin
60
+ query = get_query(action)
61
+ remaining_queries << { :query => query, :arguments => action['data'].values }
62
+ rescue Exception => e
63
+ @logger.error('Failed to prepare query', :action => action, :exception => e, :backtrace => e.backtrace)
64
+ end
65
+ end
66
+ remaining_queries
67
+ end
68
+
69
+ def get_query(action)
70
+ @logger.debug('generating query for action', :action => action)
71
+ action_data = action['data']
72
+ query =
73
+ "INSERT INTO #{action['table']} (#{action_data.keys.join(', ')})
74
+ VALUES (#{('?' * action_data.keys.count).split(//) * ', '})"
75
+ unless @statement_cache.has_key?(query)
76
+ @logger.debug('preparing new query', :query => query)
77
+ @statement_cache[query] = @session.prepare(query)
78
+ end
79
+ @statement_cache[query]
80
+ end
81
+
82
+ def execute_queries_with_retries(queries)
83
+ retries = 0
84
+ while queries.length > 0
85
+ execute_queries(queries, retries)
86
+ retries += 1
87
+ end
88
+ end
89
+
90
+ def execute_queries(queries, retries)
91
+ futures = []
92
+ while queries.length > 0
93
+ query = queries.pop
94
+ begin
95
+ future = execute_async(query, retries, queries)
96
+ futures << future
97
+ rescue Exception => e
98
+ @logger.error('Failed to send query', :query => query, :exception => e, :backtrace => e.backtrace)
99
+ end
100
+ end
101
+ futures.each(&:join)
102
+ end
103
+
104
+ def execute_async(query, retries, queries)
105
+ future = @session.execute_async(query[:query], arguments: query[:arguments])
106
+ future.on_failure { |error|
107
+ @logger.error('Failed to execute query', :query => query, :error => error)
108
+ if @retry_policy.is_a?(::Cassandra::Retry::Policies::Backoff)
109
+ decision = @retry_policy.retry_with_backoff({ :retries => retries, :consistency => @consistency })
110
+ if decision.is_a?(::Cassandra::Retry::Decisions::Retry)
111
+ queries << query
112
+ end
113
+ end
114
+ }
115
+ future
116
+ end
117
+ end
118
+ end end end