logstash-output-charrington 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +38 -1
- data/lib/logstash/outputs/charrington.rb +32 -5
- data/lib/logstash/outputs/charrington/{alter_table.rb → alter_postgres_table.rb} +6 -5
- data/lib/logstash/outputs/charrington/alter_redshift_table.rb +109 -0
- data/lib/logstash/outputs/charrington/{create_table.rb → create_postgres_table.rb} +5 -4
- data/lib/logstash/outputs/charrington/create_redshift_table.rb +88 -0
- data/lib/logstash/outputs/charrington/insert.rb +27 -9
- data/lib/logstash/outputs/charrington/process.rb +8 -2
- data/lib/logstash/outputs/charrington/{transform.rb → transform_postgres.rb} +1 -1
- data/lib/logstash/outputs/charrington/transform_redshift.rb +102 -0
- data/logstash-output-charrington.gemspec +11 -9
- data/spec/charrington_spec_helper.rb +75 -188
- data/spec/logstash-output-charrington_test_jars.rb +5 -0
- data/spec/outputs/charrington_spec.rb +118 -39
- metadata +52 -34
- data/lib/commons-io/commons-io/2.4/commons-io-2.4.jar +0 -0
- data/lib/de/flapdoodle/embed/de.flapdoodle.embed.process/2.0.2/de.flapdoodle.embed.process-2.0.2.jar +0 -0
- data/lib/net/java/dev/jna/jna-platform/4.0.0/jna-platform-4.0.0.jar +0 -0
- data/lib/net/java/dev/jna/jna/4.0.0/jna-4.0.0.jar +0 -0
- data/lib/org/apache/commons/commons-compress/1.10/commons-compress-1.10.jar +0 -0
- data/lib/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1.jar +0 -0
- data/lib/org/postgresql/postgresql/42.2.5/postgresql-42.2.5.jar +0 -0
- data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.10/postgresql-embedded-2.10.jar +0 -0
- data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.8/postgresql-embedded-2.8.jar +0 -0
- data/vendor/postgresql-42.2.5.jar +0 -0
- data/vendor/redshift.jar +0 -0
@@ -6,7 +6,7 @@ module Charrington
|
|
6
6
|
# It handles retries where applicable.
|
7
7
|
|
8
8
|
include Service
|
9
|
-
attr_reader :event, :connection, :opts, :max_retries, :schema, :retry_max_interval
|
9
|
+
attr_reader :event, :connection, :opts, :max_retries, :schema, :retry_max_interval, :driver
|
10
10
|
attr_accessor :retry_interval, :should_retry
|
11
11
|
|
12
12
|
Error = Class.new(StandardError)
|
@@ -22,6 +22,7 @@ module Charrington
|
|
22
22
|
@max_retries = opts[:max_retries] || 10
|
23
23
|
@retry_max_interval = opts[:retry_max_interval] || 2
|
24
24
|
@retry_interval = opts[:retry_initial_interval] || 2
|
25
|
+
@driver = opts[:driver]
|
25
26
|
|
26
27
|
@attempts = 1
|
27
28
|
@should_retry = true
|
@@ -29,7 +30,12 @@ module Charrington
|
|
29
30
|
|
30
31
|
def call
|
31
32
|
while should_retry do
|
32
|
-
transformed =
|
33
|
+
transformed = case driver
|
34
|
+
when "redshift"
|
35
|
+
Charrington::TransformRedshift.call(event)
|
36
|
+
else
|
37
|
+
Charrington::TransformPostgres.call(event)
|
38
|
+
end
|
33
39
|
should_retry = Charrington::Insert.call(connection, transformed, opts)
|
34
40
|
break if !should_retry
|
35
41
|
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "service")
|
2
|
+
require 'securerandom'
|
3
|
+
|
4
|
+
module Charrington
|
5
|
+
class TransformRedshift
|
6
|
+
include Service
|
7
|
+
attr_accessor :event
|
8
|
+
attr_reader :top_level_keys
|
9
|
+
|
10
|
+
Error = Class.new(StandardError)
|
11
|
+
EventNil = Class.new(Error)
|
12
|
+
TableNameNil = Class.new(Error)
|
13
|
+
ColumnBlacklist = Class.new(Error)
|
14
|
+
|
15
|
+
KEY_FILTER_BLACKLIST = ['host','path','jwt','sequence']
|
16
|
+
KEY_RAISE_BLACKLIST = ['inserted_at']
|
17
|
+
|
18
|
+
def initialize(event)
|
19
|
+
raise EventNil, "Event is nil" if event.nil?
|
20
|
+
event = event.to_hash
|
21
|
+
@event = drop_keys(event)
|
22
|
+
@top_level_keys = @event.keys
|
23
|
+
check_blacklist
|
24
|
+
end
|
25
|
+
|
26
|
+
def call
|
27
|
+
handle_event_key(event)
|
28
|
+
add_id_to_event(event)
|
29
|
+
|
30
|
+
handle_key_transform(event, "anonymous_id", "anonymous_user")
|
31
|
+
handle_key_transform(event, "sent_at", "published_at")
|
32
|
+
|
33
|
+
handle_meta_section(event)
|
34
|
+
|
35
|
+
transform_session_stuff(event)
|
36
|
+
|
37
|
+
event.delete_if {|k, _v| ['session', 'meta', 'published_at', 'anonymous_user'].include?(k) }
|
38
|
+
|
39
|
+
event
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def handle_key_transform(hash, key_that_should_be_there, key_to_take_value_from)
|
45
|
+
unless hash.has_key?(key_that_should_be_there)
|
46
|
+
hash[key_that_should_be_there] = hash[key_to_take_value_from] || ""
|
47
|
+
else
|
48
|
+
hash
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_id_to_event(hash)
|
53
|
+
hash["id"] = SecureRandom.hex(10)
|
54
|
+
end
|
55
|
+
|
56
|
+
def handle_event_key(hash)
|
57
|
+
event_name = hash["event"] || ""
|
58
|
+
|
59
|
+
hash["event_text"] = event_name
|
60
|
+
|
61
|
+
hash["event"] = underscore_event_name(event_name)
|
62
|
+
end
|
63
|
+
|
64
|
+
def underscore_event_name(event_name)
|
65
|
+
event_name.to_s.downcase.strip.gsub(/[^a-z0-9]+/, "_")
|
66
|
+
end
|
67
|
+
|
68
|
+
def transform_session_stuff(hash)
|
69
|
+
session_stuff = hash["session"] || {}
|
70
|
+
|
71
|
+
session_stuff.each {|k, v| hash["context_#{k}"] = v }
|
72
|
+
end
|
73
|
+
|
74
|
+
def handle_meta_section(hash)
|
75
|
+
meta_section = hash["meta"] || {}
|
76
|
+
|
77
|
+
meta_section.each {|k, v| hash[k] = v }
|
78
|
+
end
|
79
|
+
|
80
|
+
def check_blacklist
|
81
|
+
arr = []
|
82
|
+
KEY_RAISE_BLACKLIST.each { |k| arr << k if event.keys.include?(k) }
|
83
|
+
raise ColumnBlacklist, "Event contains these blacklisted keys: #{arr.join(",")}" unless arr.empty?
|
84
|
+
end
|
85
|
+
|
86
|
+
def drop_keys(event)
|
87
|
+
event.delete_if {|k, _v| k.start_with?("@") || KEY_FILTER_BLACKLIST.include?(k) }
|
88
|
+
end
|
89
|
+
|
90
|
+
def flatten_hash(hash)
|
91
|
+
hash.each_with_object({}) do |(k, v), acc|
|
92
|
+
if v.is_a? Hash
|
93
|
+
flatten_hash(v).map do |h_k, h_v|
|
94
|
+
acc["#{k}_#{h_k}"] = h_v
|
95
|
+
end
|
96
|
+
else
|
97
|
+
acc[k] = v
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-output-charrington'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.3.0'
|
4
4
|
|
5
|
-
s.licenses = ['Apache
|
5
|
+
s.licenses = ['Apache-2.0']
|
6
|
+
s.homepage = 'https://gitlab.podium.com/engineering/analytics/logstash-output-charrington'
|
6
7
|
s.summary = 'This plugin allows you to output to SQL, via JDBC'
|
7
8
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install 'logstash-output-charrington'. This gem is not a stand-alone program"
|
8
9
|
s.authors = ['dconger', 'brianbroderick', 'spencerdcarlson']
|
@@ -17,18 +18,19 @@ Gem::Specification.new do |s|
|
|
17
18
|
s.metadata = { 'logstash_plugin' => 'true', 'logstash_group' => 'output' }
|
18
19
|
|
19
20
|
# Gem dependencies
|
20
|
-
#
|
21
21
|
s.add_runtime_dependency 'logstash-core-plugin-api', ">= 1.60", "<= 2.99"
|
22
|
-
s.add_runtime_dependency 'logstash-codec-plain'
|
23
|
-
s.add_development_dependency 'logstash-devutils'
|
22
|
+
s.add_runtime_dependency 'logstash-codec-plain', '~> 3.0', '>= 3.0.6'
|
24
23
|
|
24
|
+
# The 'install_jars' rake task will download these jars from Maven and put them into the vendor directory
|
25
|
+
# See jar-dependencies gem's wiki - https://github.com/mkristian/jar-dependencies/wiki/declare-jars-inside-gemspec
|
25
26
|
s.requirements << "jar 'com.zaxxer:HikariCP', '2.7.2'"
|
26
27
|
s.requirements << "jar 'org.apache.logging.log4j:log4j-slf4j-impl', '2.6.2'"
|
27
28
|
|
28
|
-
s.add_development_dependency '
|
29
|
+
s.add_development_dependency 'logstash-devutils', '~> 1.3', '>= 1.3.1'
|
30
|
+
s.add_development_dependency 'jar-dependencies', '~> 0.4.0'
|
29
31
|
s.add_development_dependency 'ruby-maven', '~> 3.3'
|
30
32
|
s.add_development_dependency 'rubocop', '0.41.2'
|
31
|
-
s.add_development_dependency 'logstash-input-generator'
|
32
|
-
s.add_development_dependency 'logstash-codec-json'
|
33
|
-
s.add_development_dependency 'insist'
|
33
|
+
s.add_development_dependency 'logstash-input-generator', '~> 3.0', '>= 3.0.6'
|
34
|
+
s.add_development_dependency 'logstash-codec-json', '~> 3.0', '>= 3.0.5'
|
35
|
+
s.add_development_dependency 'insist','~> 1.0'
|
34
36
|
end
|
@@ -2,215 +2,102 @@ require 'logstash/devutils/rspec/spec_helper'
|
|
2
2
|
require 'logstash/outputs/charrington'
|
3
3
|
require 'stud/temporary'
|
4
4
|
require 'java'
|
5
|
+
require 'logstash-output-charrington_test_jars'
|
5
6
|
require 'securerandom'
|
7
|
+
java_import java.util.ArrayList
|
8
|
+
java_import java.nio.file.Paths
|
6
9
|
|
7
10
|
RSpec::Support::ObjectFormatter.default_instance.max_formatted_output_length = 80000
|
8
|
-
|
9
11
|
RSpec.configure do |c|
|
10
|
-
|
11
|
-
def start_service(name)
|
12
|
-
cmd = "sudo /etc/init.d/#{name}* start"
|
13
|
-
|
14
|
-
`which systemctl`
|
15
|
-
if $?.success?
|
16
|
-
cmd = "sudo systemctl start #{name}"
|
17
|
-
end
|
18
|
-
|
19
|
-
`#{cmd}`
|
20
|
-
end
|
21
|
-
|
22
|
-
def stop_service(name)
|
23
|
-
cmd = "sudo /etc/init.d/#{name}* stop"
|
24
|
-
|
25
|
-
`which systemctl`
|
26
|
-
if $?.success?
|
27
|
-
cmd = "sudo systemctl stop #{name}"
|
28
|
-
end
|
29
|
-
|
30
|
-
`#{cmd}`
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
RSpec.shared_context 'rspec setup' do
|
36
|
-
it 'ensure jar is available' do
|
37
|
-
expect(ENV[jdbc_jar_env]).not_to be_nil, "#{jdbc_jar_env} not defined, required to run tests"
|
38
|
-
expect(File.exist?(ENV[jdbc_jar_env])).to eq(true), "#{jdbc_jar_env} defined, but not valid"
|
39
|
-
end
|
40
12
|
end
|
41
13
|
|
42
|
-
RSpec.shared_context '
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
|
14
|
+
RSpec.shared_context 'pipeline' do
|
15
|
+
let(:run_pipeline) do
|
16
|
+
pipeline = new_pipeline_from_string(config)
|
17
|
+
pipeline.run
|
47
18
|
end
|
48
19
|
end
|
49
20
|
|
50
|
-
RSpec.shared_context '
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
let(:jdbc_test_table) do
|
56
|
-
'logstash_output_jdbc_test'
|
57
|
-
end
|
58
|
-
|
59
|
-
let(:jdbc_drop_table) do
|
60
|
-
"DROP TABLE #{jdbc_test_table}"
|
61
|
-
end
|
62
|
-
|
63
|
-
let(:jdbc_statement_fields) do
|
64
|
-
[
|
65
|
-
{db_field: "created_at", db_type: "datetime", db_value: '?', event_field: '@timestamp'},
|
66
|
-
{db_field: "message", db_type: "varchar(512)", db_value: '?', event_field: 'message'},
|
67
|
-
{db_field: "message_sprintf", db_type: "varchar(512)", db_value: '?', event_field: 'sprintf-%{message}'},
|
68
|
-
{db_field: "static_int", db_type: "int", db_value: '?', event_field: 'int'},
|
69
|
-
{db_field: "static_bigint", db_type: "bigint", db_value: '?', event_field: 'bigint'},
|
70
|
-
{db_field: "static_float", db_type: "float", db_value: '?', event_field: 'float'},
|
71
|
-
{db_field: "static_bool", db_type: "boolean", db_value: '?', event_field: 'bool'},
|
72
|
-
{db_field: "static_bigdec", db_type: "decimal", db_value: '?', event_field: 'bigdec'}
|
73
|
-
]
|
74
|
-
end
|
75
|
-
|
76
|
-
let(:jdbc_create_table) do
|
77
|
-
fields = jdbc_statement_fields.collect { |entry| "#{entry[:db_field]} #{entry[:db_type]} not null" }.join(", ")
|
78
|
-
|
79
|
-
"CREATE table #{jdbc_test_table} (#{fields})"
|
80
|
-
end
|
81
|
-
|
82
|
-
let(:jdbc_drop_table) do
|
83
|
-
"DROP table #{jdbc_test_table}"
|
84
|
-
end
|
85
|
-
|
86
|
-
let(:jdbc_statement) do
|
87
|
-
fields = jdbc_statement_fields.collect { |entry| "#{entry[:db_field]}" }.join(", ")
|
88
|
-
values = jdbc_statement_fields.collect { |entry| "#{entry[:db_value]}" }.join(", ")
|
89
|
-
statement = jdbc_statement_fields.collect { |entry| entry[:event_field] }
|
21
|
+
RSpec.shared_context 'postgres' do
|
22
|
+
def start_database(host='localhost', port=57354, database='winston', user='testuser', password='password')
|
23
|
+
config = Java::RuYandexQatoolsEmbedPostgresql::EmbeddedPostgres::cachedRuntimeConfig(Paths::get('/tmp/charrington-test-db-cache')) # avoid archive extraction every time
|
24
|
+
db = Java::RuYandexQatoolsEmbedPostgresql::EmbeddedPostgres.new
|
25
|
+
@url = db.start(config, host, port, database, user, password, ArrayList.new(["-E", "SQL_ASCII", "--locale=C", "--lc-collate=C", "--lc-ctype=C"]))
|
90
26
|
|
91
|
-
|
27
|
+
# setup connection manager
|
28
|
+
@connection_manager = Java::ComZaxxerHikari::HikariDataSource.new
|
29
|
+
@connection_manager.setDriverClassName('org.postgresql.Driver')
|
30
|
+
@connection_manager.setUsername(user)
|
31
|
+
@connection_manager.setPassword(password)
|
32
|
+
@connection_manager.setJdbcUrl(@url)
|
92
33
|
end
|
93
34
|
|
94
|
-
|
95
|
-
|
35
|
+
before(:all) do
|
36
|
+
start_database
|
96
37
|
end
|
97
38
|
|
98
|
-
let(:
|
99
|
-
#
|
100
|
-
|
101
|
-
message: "test-message #{SecureRandom.uuid}",
|
102
|
-
float: 12.1,
|
103
|
-
bigint: 4000881632477184,
|
104
|
-
bool: true,
|
105
|
-
int: 1,
|
106
|
-
bigdec: BigDecimal.new("123.123")
|
107
|
-
})
|
108
|
-
end
|
109
|
-
|
110
|
-
let(:plugin) do
|
111
|
-
# Setup logger
|
112
|
-
allow(LogStash::Outputs::Jdbc).to receive(:logger).and_return(logger)
|
113
|
-
|
114
|
-
# XXX: Suppress reflection logging. There has to be a better way around this.
|
115
|
-
allow(logger).to receive(:debug).with(/config LogStash::/)
|
116
|
-
|
117
|
-
# Suppress beta warnings.
|
118
|
-
allow(logger).to receive(:info).with(/Please let us know if you find bugs or have suggestions on how to improve this plugin./)
|
119
|
-
|
120
|
-
# Suppress start up messages.
|
121
|
-
expect(logger).to receive(:info).once.with(/JDBC - Starting up/)
|
122
|
-
|
123
|
-
# Setup plugin
|
124
|
-
output = LogStash::Plugin.lookup('output', 'jdbc').new(jdbc_settings)
|
125
|
-
output.register
|
126
|
-
|
127
|
-
output
|
128
|
-
end
|
129
|
-
|
130
|
-
before :each do
|
131
|
-
# Setup table
|
132
|
-
c = plugin.instance_variable_get(:@pool).getConnection
|
133
|
-
|
134
|
-
# Derby doesn't support IF EXISTS.
|
135
|
-
# Seems like the quickest solution. Bleurgh.
|
136
|
-
begin
|
137
|
-
stmt = c.createStatement
|
138
|
-
stmt.executeUpdate(jdbc_drop_table)
|
139
|
-
rescue
|
140
|
-
# noop
|
141
|
-
ensure
|
142
|
-
stmt.close
|
39
|
+
let(:driver_path) {
|
40
|
+
ENV.fetch('DRIVER_JAR_PATH', Pathname.new("#{Dir.pwd}/vendor/jar-dependencies/test-jars/postgresql-42.2.5.jar").to_s)
|
41
|
+
}
|
143
42
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
43
|
+
def query(sql)
|
44
|
+
execute(sql, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
def create(sql)
|
48
|
+
execute(sql, false)
|
49
|
+
end
|
50
|
+
|
51
|
+
def insert(sql)
|
52
|
+
execute(sql)
|
53
|
+
end
|
54
|
+
|
55
|
+
def execute(sql, results=false)
|
56
|
+
conn = @connection_manager.getConnection
|
57
|
+
stmt = conn.prepareStatement(sql);
|
58
|
+
if !results
|
59
|
+
stmt.execute
|
60
|
+
return
|
61
|
+
else
|
62
|
+
rs = stmt.executeQuery()
|
63
|
+
meta = rs.getMetaData()
|
64
|
+
n = meta.getColumnCount()
|
65
|
+
results = []
|
66
|
+
while rs.next() do
|
67
|
+
row = {}
|
68
|
+
(1..n).each do |i|
|
69
|
+
row[meta.getColumnName(i).to_sym] = rs.getString(i)
|
70
|
+
end
|
71
|
+
results << row
|
72
|
+
end
|
73
|
+
results
|
148
74
|
end
|
75
|
+
rescue => e
|
76
|
+
puts "Error executing query. sql=#{sql} #{e.message}"
|
77
|
+
false
|
78
|
+
ensure
|
79
|
+
stmt.close if !stmt.nil? and !stmt.isClosed
|
80
|
+
conn.close if !conn.nil? and !conn.isClosed
|
149
81
|
end
|
150
82
|
|
151
|
-
|
152
|
-
|
153
|
-
c = plugin.instance_variable_get(:@pool).getConnection
|
154
|
-
|
155
|
-
stmt = c.createStatement
|
156
|
-
stmt.executeUpdate(jdbc_drop_table)
|
157
|
-
stmt.close
|
158
|
-
c.close
|
159
|
-
end
|
160
|
-
|
161
|
-
it 'should save a event' do
|
162
|
-
expect { plugin.multi_receive([event]) }.to_not raise_error
|
163
|
-
|
164
|
-
# Verify the number of items in the output table
|
165
|
-
c = plugin.instance_variable_get(:@pool).getConnection
|
166
|
-
|
167
|
-
# TODO replace this simple count with a check of the actual contents
|
168
|
-
|
169
|
-
stmt = c.prepareStatement("select count(*) as total from #{jdbc_test_table} where message = ?")
|
170
|
-
stmt.setString(1, event.get('message'))
|
171
|
-
rs = stmt.executeQuery
|
172
|
-
count = 0
|
173
|
-
count = rs.getInt('total') while rs.next
|
174
|
-
stmt.close
|
175
|
-
c.close
|
176
|
-
|
177
|
-
expect(count).to eq(1)
|
83
|
+
def drop_table(table)
|
84
|
+
execute_update("DROP TABLE IF EXISTS #{table}")
|
178
85
|
end
|
179
86
|
|
180
|
-
|
181
|
-
|
182
|
-
original_event = e.get('message')
|
183
|
-
e.set('message', nil)
|
184
|
-
|
185
|
-
expect(logger).to receive(:error).once.with(/JDBC - Exception. Not retrying/, Hash)
|
186
|
-
expect { plugin.multi_receive([event]) }.to_not raise_error
|
187
|
-
|
188
|
-
e.set('message', original_event)
|
87
|
+
def create_table(sql)
|
88
|
+
execute_update(sql)
|
189
89
|
end
|
190
90
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
t = Thread.new(systemd_database_service) { |systemd_database_service|
|
203
|
-
sleep 20
|
204
|
-
|
205
|
-
start_service(systemd_database_service)
|
206
|
-
}
|
207
|
-
|
208
|
-
t.run
|
209
|
-
|
210
|
-
expect(logger).to receive(:warn).at_least(:once).with(/JDBC - Exception. Retrying/, Hash)
|
211
|
-
expect { p.multi_receive([event]) }.to_not raise_error
|
212
|
-
|
213
|
-
# Wait for the thread to finish
|
214
|
-
t.join
|
91
|
+
def execute_update(sql)
|
92
|
+
conn = @connection_manager.getConnection
|
93
|
+
stmt = conn.createStatement
|
94
|
+
stmt.executeUpdate(sql)
|
95
|
+
true
|
96
|
+
rescue => e
|
97
|
+
puts "Error executing update. sql=#{sql} #{e.message}"
|
98
|
+
false
|
99
|
+
ensure
|
100
|
+
stmt.close if !stmt.nil? and !stmt.isClosed
|
101
|
+
conn.close if !conn.nil? and !conn.isClosed
|
215
102
|
end
|
216
103
|
end
|