logstash-output-charrington 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +38 -1
  3. data/lib/logstash/outputs/charrington.rb +32 -5
  4. data/lib/logstash/outputs/charrington/{alter_table.rb → alter_postgres_table.rb} +6 -5
  5. data/lib/logstash/outputs/charrington/alter_redshift_table.rb +109 -0
  6. data/lib/logstash/outputs/charrington/{create_table.rb → create_postgres_table.rb} +5 -4
  7. data/lib/logstash/outputs/charrington/create_redshift_table.rb +88 -0
  8. data/lib/logstash/outputs/charrington/insert.rb +27 -9
  9. data/lib/logstash/outputs/charrington/process.rb +8 -2
  10. data/lib/logstash/outputs/charrington/{transform.rb → transform_postgres.rb} +1 -1
  11. data/lib/logstash/outputs/charrington/transform_redshift.rb +102 -0
  12. data/logstash-output-charrington.gemspec +11 -9
  13. data/spec/charrington_spec_helper.rb +75 -188
  14. data/spec/logstash-output-charrington_test_jars.rb +5 -0
  15. data/spec/outputs/charrington_spec.rb +118 -39
  16. metadata +52 -34
  17. data/lib/commons-io/commons-io/2.4/commons-io-2.4.jar +0 -0
  18. data/lib/de/flapdoodle/embed/de.flapdoodle.embed.process/2.0.2/de.flapdoodle.embed.process-2.0.2.jar +0 -0
  19. data/lib/net/java/dev/jna/jna-platform/4.0.0/jna-platform-4.0.0.jar +0 -0
  20. data/lib/net/java/dev/jna/jna/4.0.0/jna-4.0.0.jar +0 -0
  21. data/lib/org/apache/commons/commons-compress/1.10/commons-compress-1.10.jar +0 -0
  22. data/lib/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1.jar +0 -0
  23. data/lib/org/postgresql/postgresql/42.2.5/postgresql-42.2.5.jar +0 -0
  24. data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.10/postgresql-embedded-2.10.jar +0 -0
  25. data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.8/postgresql-embedded-2.8.jar +0 -0
  26. data/vendor/postgresql-42.2.5.jar +0 -0
  27. data/vendor/redshift.jar +0 -0
@@ -6,7 +6,7 @@ module Charrington
6
6
  # It handles retries where applicable.
7
7
 
8
8
  include Service
9
- attr_reader :event, :connection, :opts, :max_retries, :schema, :retry_max_interval
9
+ attr_reader :event, :connection, :opts, :max_retries, :schema, :retry_max_interval, :driver
10
10
  attr_accessor :retry_interval, :should_retry
11
11
 
12
12
  Error = Class.new(StandardError)
@@ -22,6 +22,7 @@ module Charrington
22
22
  @max_retries = opts[:max_retries] || 10
23
23
  @retry_max_interval = opts[:retry_max_interval] || 2
24
24
  @retry_interval = opts[:retry_initial_interval] || 2
25
+ @driver = opts[:driver]
25
26
 
26
27
  @attempts = 1
27
28
  @should_retry = true
@@ -29,7 +30,12 @@ module Charrington
29
30
 
30
31
  def call
31
32
  while should_retry do
32
- transformed = Charrington::Transform.call(event)
33
+ transformed = case driver
34
+ when "redshift"
35
+ Charrington::TransformRedshift.call(event)
36
+ else
37
+ Charrington::TransformPostgres.call(event)
38
+ end
33
39
  should_retry = Charrington::Insert.call(connection, transformed, opts)
34
40
  break if !should_retry
35
41
 
@@ -1,7 +1,7 @@
1
1
  require File.join(File.dirname(__FILE__), "service")
2
2
 
3
3
  module Charrington
4
- class Transform
4
+ class TransformPostgres
5
5
  include Service
6
6
  attr_accessor :event
7
7
  attr_reader :top_level_keys
@@ -0,0 +1,102 @@
1
+ require File.join(File.dirname(__FILE__), "service")
2
+ require 'securerandom'
3
+
4
+ module Charrington
5
+ class TransformRedshift
6
+ include Service
7
+ attr_accessor :event
8
+ attr_reader :top_level_keys
9
+
10
+ Error = Class.new(StandardError)
11
+ EventNil = Class.new(Error)
12
+ TableNameNil = Class.new(Error)
13
+ ColumnBlacklist = Class.new(Error)
14
+
15
+ KEY_FILTER_BLACKLIST = ['host','path','jwt','sequence']
16
+ KEY_RAISE_BLACKLIST = ['inserted_at']
17
+
18
+ def initialize(event)
19
+ raise EventNil, "Event is nil" if event.nil?
20
+ event = event.to_hash
21
+ @event = drop_keys(event)
22
+ @top_level_keys = @event.keys
23
+ check_blacklist
24
+ end
25
+
26
+ def call
27
+ handle_event_key(event)
28
+ add_id_to_event(event)
29
+
30
+ handle_key_transform(event, "anonymous_id", "anonymous_user")
31
+ handle_key_transform(event, "sent_at", "published_at")
32
+
33
+ handle_meta_section(event)
34
+
35
+ transform_session_stuff(event)
36
+
37
+ event.delete_if {|k, _v| ['session', 'meta', 'published_at', 'anonymous_user'].include?(k) }
38
+
39
+ event
40
+ end
41
+
42
+ private
43
+
44
+ def handle_key_transform(hash, key_that_should_be_there, key_to_take_value_from)
45
+ unless hash.has_key?(key_that_should_be_there)
46
+ hash[key_that_should_be_there] = hash[key_to_take_value_from] || ""
47
+ else
48
+ hash
49
+ end
50
+ end
51
+
52
+ def add_id_to_event(hash)
53
+ hash["id"] = SecureRandom.hex(10)
54
+ end
55
+
56
+ def handle_event_key(hash)
57
+ event_name = hash["event"] || ""
58
+
59
+ hash["event_text"] = event_name
60
+
61
+ hash["event"] = underscore_event_name(event_name)
62
+ end
63
+
64
+ def underscore_event_name(event_name)
65
+ event_name.to_s.downcase.strip.gsub(/[^a-z0-9]+/, "_")
66
+ end
67
+
68
+ def transform_session_stuff(hash)
69
+ session_stuff = hash["session"] || {}
70
+
71
+ session_stuff.each {|k, v| hash["context_#{k}"] = v }
72
+ end
73
+
74
+ def handle_meta_section(hash)
75
+ meta_section = hash["meta"] || {}
76
+
77
+ meta_section.each {|k, v| hash[k] = v }
78
+ end
79
+
80
+ def check_blacklist
81
+ arr = []
82
+ KEY_RAISE_BLACKLIST.each { |k| arr << k if event.keys.include?(k) }
83
+ raise ColumnBlacklist, "Event contains these blacklisted keys: #{arr.join(",")}" unless arr.empty?
84
+ end
85
+
86
+ def drop_keys(event)
87
+ event.delete_if {|k, _v| k.start_with?("@") || KEY_FILTER_BLACKLIST.include?(k) }
88
+ end
89
+
90
+ def flatten_hash(hash)
91
+ hash.each_with_object({}) do |(k, v), acc|
92
+ if v.is_a? Hash
93
+ flatten_hash(v).map do |h_k, h_v|
94
+ acc["#{k}_#{h_k}"] = h_v
95
+ end
96
+ else
97
+ acc[k] = v
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -1,8 +1,9 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-charrington'
3
- s.version = '0.2.2'
3
+ s.version = '0.3.0'
4
4
 
5
- s.licenses = ['Apache License (2.0)']
5
+ s.licenses = ['Apache-2.0']
6
+ s.homepage = 'https://gitlab.podium.com/engineering/analytics/logstash-output-charrington'
6
7
  s.summary = 'This plugin allows you to output to SQL, via JDBC'
7
8
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install 'logstash-output-charrington'. This gem is not a stand-alone program"
8
9
  s.authors = ['dconger', 'brianbroderick', 'spencerdcarlson']
@@ -17,18 +18,19 @@ Gem::Specification.new do |s|
17
18
  s.metadata = { 'logstash_plugin' => 'true', 'logstash_group' => 'output' }
18
19
 
19
20
  # Gem dependencies
20
- #
21
21
  s.add_runtime_dependency 'logstash-core-plugin-api', ">= 1.60", "<= 2.99"
22
- s.add_runtime_dependency 'logstash-codec-plain'
23
- s.add_development_dependency 'logstash-devutils'
22
+ s.add_runtime_dependency 'logstash-codec-plain', '~> 3.0', '>= 3.0.6'
24
23
 
24
+ # The 'install_jars' rake task will download these jars from Maven and put them into the vendor directory
25
+ # See jar-dependencies gem's wiki - https://github.com/mkristian/jar-dependencies/wiki/declare-jars-inside-gemspec
25
26
  s.requirements << "jar 'com.zaxxer:HikariCP', '2.7.2'"
26
27
  s.requirements << "jar 'org.apache.logging.log4j:log4j-slf4j-impl', '2.6.2'"
27
28
 
28
- s.add_development_dependency 'jar-dependencies'
29
+ s.add_development_dependency 'logstash-devutils', '~> 1.3', '>= 1.3.1'
30
+ s.add_development_dependency 'jar-dependencies', '~> 0.4.0'
29
31
  s.add_development_dependency 'ruby-maven', '~> 3.3'
30
32
  s.add_development_dependency 'rubocop', '0.41.2'
31
- s.add_development_dependency 'logstash-input-generator'
32
- s.add_development_dependency 'logstash-codec-json'
33
- s.add_development_dependency 'insist'
33
+ s.add_development_dependency 'logstash-input-generator', '~> 3.0', '>= 3.0.6'
34
+ s.add_development_dependency 'logstash-codec-json', '~> 3.0', '>= 3.0.5'
35
+ s.add_development_dependency 'insist','~> 1.0'
34
36
  end
@@ -2,215 +2,102 @@ require 'logstash/devutils/rspec/spec_helper'
2
2
  require 'logstash/outputs/charrington'
3
3
  require 'stud/temporary'
4
4
  require 'java'
5
+ require 'logstash-output-charrington_test_jars'
5
6
  require 'securerandom'
7
+ java_import java.util.ArrayList
8
+ java_import java.nio.file.Paths
6
9
 
7
10
  RSpec::Support::ObjectFormatter.default_instance.max_formatted_output_length = 80000
8
-
9
11
  RSpec.configure do |c|
10
-
11
- def start_service(name)
12
- cmd = "sudo /etc/init.d/#{name}* start"
13
-
14
- `which systemctl`
15
- if $?.success?
16
- cmd = "sudo systemctl start #{name}"
17
- end
18
-
19
- `#{cmd}`
20
- end
21
-
22
- def stop_service(name)
23
- cmd = "sudo /etc/init.d/#{name}* stop"
24
-
25
- `which systemctl`
26
- if $?.success?
27
- cmd = "sudo systemctl stop #{name}"
28
- end
29
-
30
- `#{cmd}`
31
- end
32
-
33
- end
34
-
35
- RSpec.shared_context 'rspec setup' do
36
- it 'ensure jar is available' do
37
- expect(ENV[jdbc_jar_env]).not_to be_nil, "#{jdbc_jar_env} not defined, required to run tests"
38
- expect(File.exist?(ENV[jdbc_jar_env])).to eq(true), "#{jdbc_jar_env} defined, but not valid"
39
- end
40
12
  end
41
13
 
42
- RSpec.shared_context 'when initializing' do
43
- it 'shouldn\'t register with a missing jar file' do
44
- jdbc_settings['driver_jar_path'] = nil
45
- plugin = LogStash::Plugin.lookup('output', 'jdbc').new(jdbc_settings)
46
- expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
14
+ RSpec.shared_context 'pipeline' do
15
+ let(:run_pipeline) do
16
+ pipeline = new_pipeline_from_string(config)
17
+ pipeline.run
47
18
  end
48
19
  end
49
20
 
50
- RSpec.shared_context 'when outputting messages' do
51
- let(:logger) {
52
- double("logger")
53
- }
54
-
55
- let(:jdbc_test_table) do
56
- 'logstash_output_jdbc_test'
57
- end
58
-
59
- let(:jdbc_drop_table) do
60
- "DROP TABLE #{jdbc_test_table}"
61
- end
62
-
63
- let(:jdbc_statement_fields) do
64
- [
65
- {db_field: "created_at", db_type: "datetime", db_value: '?', event_field: '@timestamp'},
66
- {db_field: "message", db_type: "varchar(512)", db_value: '?', event_field: 'message'},
67
- {db_field: "message_sprintf", db_type: "varchar(512)", db_value: '?', event_field: 'sprintf-%{message}'},
68
- {db_field: "static_int", db_type: "int", db_value: '?', event_field: 'int'},
69
- {db_field: "static_bigint", db_type: "bigint", db_value: '?', event_field: 'bigint'},
70
- {db_field: "static_float", db_type: "float", db_value: '?', event_field: 'float'},
71
- {db_field: "static_bool", db_type: "boolean", db_value: '?', event_field: 'bool'},
72
- {db_field: "static_bigdec", db_type: "decimal", db_value: '?', event_field: 'bigdec'}
73
- ]
74
- end
75
-
76
- let(:jdbc_create_table) do
77
- fields = jdbc_statement_fields.collect { |entry| "#{entry[:db_field]} #{entry[:db_type]} not null" }.join(", ")
78
-
79
- "CREATE table #{jdbc_test_table} (#{fields})"
80
- end
81
-
82
- let(:jdbc_drop_table) do
83
- "DROP table #{jdbc_test_table}"
84
- end
85
-
86
- let(:jdbc_statement) do
87
- fields = jdbc_statement_fields.collect { |entry| "#{entry[:db_field]}" }.join(", ")
88
- values = jdbc_statement_fields.collect { |entry| "#{entry[:db_value]}" }.join(", ")
89
- statement = jdbc_statement_fields.collect { |entry| entry[:event_field] }
21
+ RSpec.shared_context 'postgres' do
22
+ def start_database(host='localhost', port=57354, database='winston', user='testuser', password='password')
23
+ config = Java::RuYandexQatoolsEmbedPostgresql::EmbeddedPostgres::cachedRuntimeConfig(Paths::get('/tmp/charrington-test-db-cache')) # avoid archive extraction every time
24
+ db = Java::RuYandexQatoolsEmbedPostgresql::EmbeddedPostgres.new
25
+ @url = db.start(config, host, port, database, user, password, ArrayList.new(["-E", "SQL_ASCII", "--locale=C", "--lc-collate=C", "--lc-ctype=C"]))
90
26
 
91
- statement.insert(0, "insert into #{jdbc_test_table} (#{fields}) values(#{values})")
27
+ # setup connection manager
28
+ @connection_manager = Java::ComZaxxerHikari::HikariDataSource.new
29
+ @connection_manager.setDriverClassName('org.postgresql.Driver')
30
+ @connection_manager.setUsername(user)
31
+ @connection_manager.setPassword(password)
32
+ @connection_manager.setJdbcUrl(@url)
92
33
  end
93
34
 
94
- let(:systemd_database_service) do
95
- nil
35
+ before(:all) do
36
+ start_database
96
37
  end
97
38
 
98
- let(:event) do
99
- # TODO: Auto generate fields from jdbc_statement_fields
100
- LogStash::Event.new({
101
- message: "test-message #{SecureRandom.uuid}",
102
- float: 12.1,
103
- bigint: 4000881632477184,
104
- bool: true,
105
- int: 1,
106
- bigdec: BigDecimal.new("123.123")
107
- })
108
- end
109
-
110
- let(:plugin) do
111
- # Setup logger
112
- allow(LogStash::Outputs::Jdbc).to receive(:logger).and_return(logger)
113
-
114
- # XXX: Suppress reflection logging. There has to be a better way around this.
115
- allow(logger).to receive(:debug).with(/config LogStash::/)
116
-
117
- # Suppress beta warnings.
118
- allow(logger).to receive(:info).with(/Please let us know if you find bugs or have suggestions on how to improve this plugin./)
119
-
120
- # Suppress start up messages.
121
- expect(logger).to receive(:info).once.with(/JDBC - Starting up/)
122
-
123
- # Setup plugin
124
- output = LogStash::Plugin.lookup('output', 'jdbc').new(jdbc_settings)
125
- output.register
126
-
127
- output
128
- end
129
-
130
- before :each do
131
- # Setup table
132
- c = plugin.instance_variable_get(:@pool).getConnection
133
-
134
- # Derby doesn't support IF EXISTS.
135
- # Seems like the quickest solution. Bleurgh.
136
- begin
137
- stmt = c.createStatement
138
- stmt.executeUpdate(jdbc_drop_table)
139
- rescue
140
- # noop
141
- ensure
142
- stmt.close
39
+ let(:driver_path) {
40
+ ENV.fetch('DRIVER_JAR_PATH', Pathname.new("#{Dir.pwd}/vendor/jar-dependencies/test-jars/postgresql-42.2.5.jar").to_s)
41
+ }
143
42
 
144
- stmt = c.createStatement
145
- stmt.executeUpdate(jdbc_create_table)
146
- stmt.close
147
- c.close
43
+ def query(sql)
44
+ execute(sql, true)
45
+ end
46
+
47
+ def create(sql)
48
+ execute(sql, false)
49
+ end
50
+
51
+ def insert(sql)
52
+ execute(sql)
53
+ end
54
+
55
+ def execute(sql, results=false)
56
+ conn = @connection_manager.getConnection
57
+ stmt = conn.prepareStatement(sql);
58
+ if !results
59
+ stmt.execute
60
+ return
61
+ else
62
+ rs = stmt.executeQuery()
63
+ meta = rs.getMetaData()
64
+ n = meta.getColumnCount()
65
+ results = []
66
+ while rs.next() do
67
+ row = {}
68
+ (1..n).each do |i|
69
+ row[meta.getColumnName(i).to_sym] = rs.getString(i)
70
+ end
71
+ results << row
72
+ end
73
+ results
148
74
  end
75
+ rescue => e
76
+ puts "Error executing query. sql=#{sql} #{e.message}"
77
+ false
78
+ ensure
79
+ stmt.close if !stmt.nil? and !stmt.isClosed
80
+ conn.close if !conn.nil? and !conn.isClosed
149
81
  end
150
82
 
151
- # Delete table after each
152
- after :each do
153
- c = plugin.instance_variable_get(:@pool).getConnection
154
-
155
- stmt = c.createStatement
156
- stmt.executeUpdate(jdbc_drop_table)
157
- stmt.close
158
- c.close
159
- end
160
-
161
- it 'should save a event' do
162
- expect { plugin.multi_receive([event]) }.to_not raise_error
163
-
164
- # Verify the number of items in the output table
165
- c = plugin.instance_variable_get(:@pool).getConnection
166
-
167
- # TODO replace this simple count with a check of the actual contents
168
-
169
- stmt = c.prepareStatement("select count(*) as total from #{jdbc_test_table} where message = ?")
170
- stmt.setString(1, event.get('message'))
171
- rs = stmt.executeQuery
172
- count = 0
173
- count = rs.getInt('total') while rs.next
174
- stmt.close
175
- c.close
176
-
177
- expect(count).to eq(1)
83
+ def drop_table(table)
84
+ execute_update("DROP TABLE IF EXISTS #{table}")
178
85
  end
179
86
 
180
- it 'should not save event, and log an unretryable exception' do
181
- e = event
182
- original_event = e.get('message')
183
- e.set('message', nil)
184
-
185
- expect(logger).to receive(:error).once.with(/JDBC - Exception. Not retrying/, Hash)
186
- expect { plugin.multi_receive([event]) }.to_not raise_error
187
-
188
- e.set('message', original_event)
87
+ def create_table(sql)
88
+ execute_update(sql)
189
89
  end
190
90
 
191
- it 'it should retry after a connection loss, and log a warning' do
192
- skip "does not run as a service, or known issue with test" if systemd_database_service.nil?
193
-
194
- p = plugin
195
-
196
- # Check that everything is fine right now
197
- expect { p.multi_receive([event]) }.not_to raise_error
198
-
199
- stop_service(systemd_database_service)
200
-
201
- # Start a thread to restart the service after the fact.
202
- t = Thread.new(systemd_database_service) { |systemd_database_service|
203
- sleep 20
204
-
205
- start_service(systemd_database_service)
206
- }
207
-
208
- t.run
209
-
210
- expect(logger).to receive(:warn).at_least(:once).with(/JDBC - Exception. Retrying/, Hash)
211
- expect { p.multi_receive([event]) }.to_not raise_error
212
-
213
- # Wait for the thread to finish
214
- t.join
91
+ def execute_update(sql)
92
+ conn = @connection_manager.getConnection
93
+ stmt = conn.createStatement
94
+ stmt.executeUpdate(sql)
95
+ true
96
+ rescue => e
97
+ puts "Error executing update. sql=#{sql} #{e.message}"
98
+ false
99
+ ensure
100
+ stmt.close if !stmt.nil? and !stmt.isClosed
101
+ conn.close if !conn.nil? and !conn.isClosed
215
102
  end
216
103
  end