logstash-output-charrington 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +38 -1
  3. data/lib/logstash/outputs/charrington.rb +32 -5
  4. data/lib/logstash/outputs/charrington/{alter_table.rb → alter_postgres_table.rb} +6 -5
  5. data/lib/logstash/outputs/charrington/alter_redshift_table.rb +109 -0
  6. data/lib/logstash/outputs/charrington/{create_table.rb → create_postgres_table.rb} +5 -4
  7. data/lib/logstash/outputs/charrington/create_redshift_table.rb +88 -0
  8. data/lib/logstash/outputs/charrington/insert.rb +27 -9
  9. data/lib/logstash/outputs/charrington/process.rb +8 -2
  10. data/lib/logstash/outputs/charrington/{transform.rb → transform_postgres.rb} +1 -1
  11. data/lib/logstash/outputs/charrington/transform_redshift.rb +102 -0
  12. data/logstash-output-charrington.gemspec +11 -9
  13. data/spec/charrington_spec_helper.rb +75 -188
  14. data/spec/logstash-output-charrington_test_jars.rb +5 -0
  15. data/spec/outputs/charrington_spec.rb +118 -39
  16. metadata +52 -34
  17. data/lib/commons-io/commons-io/2.4/commons-io-2.4.jar +0 -0
  18. data/lib/de/flapdoodle/embed/de.flapdoodle.embed.process/2.0.2/de.flapdoodle.embed.process-2.0.2.jar +0 -0
  19. data/lib/net/java/dev/jna/jna-platform/4.0.0/jna-platform-4.0.0.jar +0 -0
  20. data/lib/net/java/dev/jna/jna/4.0.0/jna-4.0.0.jar +0 -0
  21. data/lib/org/apache/commons/commons-compress/1.10/commons-compress-1.10.jar +0 -0
  22. data/lib/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1.jar +0 -0
  23. data/lib/org/postgresql/postgresql/42.2.5/postgresql-42.2.5.jar +0 -0
  24. data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.10/postgresql-embedded-2.10.jar +0 -0
  25. data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.8/postgresql-embedded-2.8.jar +0 -0
  26. data/vendor/postgresql-42.2.5.jar +0 -0
  27. data/vendor/redshift.jar +0 -0
@@ -6,7 +6,7 @@ module Charrington
6
6
  # It handles retries where applicable.
7
7
 
8
8
  include Service
9
- attr_reader :event, :connection, :opts, :max_retries, :schema, :retry_max_interval
9
+ attr_reader :event, :connection, :opts, :max_retries, :schema, :retry_max_interval, :driver
10
10
  attr_accessor :retry_interval, :should_retry
11
11
 
12
12
  Error = Class.new(StandardError)
@@ -22,6 +22,7 @@ module Charrington
22
22
  @max_retries = opts[:max_retries] || 10
23
23
  @retry_max_interval = opts[:retry_max_interval] || 2
24
24
  @retry_interval = opts[:retry_initial_interval] || 2
25
+ @driver = opts[:driver]
25
26
 
26
27
  @attempts = 1
27
28
  @should_retry = true
@@ -29,7 +30,12 @@ module Charrington
29
30
 
30
31
  def call
31
32
  while should_retry do
32
- transformed = Charrington::Transform.call(event)
33
+ transformed = case driver
34
+ when "redshift"
35
+ Charrington::TransformRedshift.call(event)
36
+ else
37
+ Charrington::TransformPostgres.call(event)
38
+ end
33
39
  should_retry = Charrington::Insert.call(connection, transformed, opts)
34
40
  break if !should_retry
35
41
 
@@ -1,7 +1,7 @@
1
1
  require File.join(File.dirname(__FILE__), "service")
2
2
 
3
3
  module Charrington
4
- class Transform
4
+ class TransformPostgres
5
5
  include Service
6
6
  attr_accessor :event
7
7
  attr_reader :top_level_keys
@@ -0,0 +1,102 @@
1
+ require File.join(File.dirname(__FILE__), "service")
2
+ require 'securerandom'
3
+
4
+ module Charrington
5
+ class TransformRedshift
6
+ include Service
7
+ attr_accessor :event
8
+ attr_reader :top_level_keys
9
+
10
+ Error = Class.new(StandardError)
11
+ EventNil = Class.new(Error)
12
+ TableNameNil = Class.new(Error)
13
+ ColumnBlacklist = Class.new(Error)
14
+
15
+ KEY_FILTER_BLACKLIST = ['host','path','jwt','sequence']
16
+ KEY_RAISE_BLACKLIST = ['inserted_at']
17
+
18
+ def initialize(event)
19
+ raise EventNil, "Event is nil" if event.nil?
20
+ event = event.to_hash
21
+ @event = drop_keys(event)
22
+ @top_level_keys = @event.keys
23
+ check_blacklist
24
+ end
25
+
26
+ def call
27
+ handle_event_key(event)
28
+ add_id_to_event(event)
29
+
30
+ handle_key_transform(event, "anonymous_id", "anonymous_user")
31
+ handle_key_transform(event, "sent_at", "published_at")
32
+
33
+ handle_meta_section(event)
34
+
35
+ transform_session_stuff(event)
36
+
37
+ event.delete_if {|k, _v| ['session', 'meta', 'published_at', 'anonymous_user'].include?(k) }
38
+
39
+ event
40
+ end
41
+
42
+ private
43
+
44
+ def handle_key_transform(hash, key_that_should_be_there, key_to_take_value_from)
45
+ unless hash.has_key?(key_that_should_be_there)
46
+ hash[key_that_should_be_there] = hash[key_to_take_value_from] || ""
47
+ else
48
+ hash
49
+ end
50
+ end
51
+
52
+ def add_id_to_event(hash)
53
+ hash["id"] = SecureRandom.hex(10)
54
+ end
55
+
56
+ def handle_event_key(hash)
57
+ event_name = hash["event"] || ""
58
+
59
+ hash["event_text"] = event_name
60
+
61
+ hash["event"] = underscore_event_name(event_name)
62
+ end
63
+
64
+ def underscore_event_name(event_name)
65
+ event_name.to_s.downcase.strip.gsub(/[^a-z0-9]+/, "_")
66
+ end
67
+
68
+ def transform_session_stuff(hash)
69
+ session_stuff = hash["session"] || {}
70
+
71
+ session_stuff.each {|k, v| hash["context_#{k}"] = v }
72
+ end
73
+
74
+ def handle_meta_section(hash)
75
+ meta_section = hash["meta"] || {}
76
+
77
+ meta_section.each {|k, v| hash[k] = v }
78
+ end
79
+
80
+ def check_blacklist
81
+ arr = []
82
+ KEY_RAISE_BLACKLIST.each { |k| arr << k if event.keys.include?(k) }
83
+ raise ColumnBlacklist, "Event contains these blacklisted keys: #{arr.join(",")}" unless arr.empty?
84
+ end
85
+
86
+ def drop_keys(event)
87
+ event.delete_if {|k, _v| k.start_with?("@") || KEY_FILTER_BLACKLIST.include?(k) }
88
+ end
89
+
90
+ def flatten_hash(hash)
91
+ hash.each_with_object({}) do |(k, v), acc|
92
+ if v.is_a? Hash
93
+ flatten_hash(v).map do |h_k, h_v|
94
+ acc["#{k}_#{h_k}"] = h_v
95
+ end
96
+ else
97
+ acc[k] = v
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -1,8 +1,9 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-charrington'
3
- s.version = '0.2.2'
3
+ s.version = '0.3.0'
4
4
 
5
- s.licenses = ['Apache License (2.0)']
5
+ s.licenses = ['Apache-2.0']
6
+ s.homepage = 'https://gitlab.podium.com/engineering/analytics/logstash-output-charrington'
6
7
  s.summary = 'This plugin allows you to output to SQL, via JDBC'
7
8
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install 'logstash-output-charrington'. This gem is not a stand-alone program"
8
9
  s.authors = ['dconger', 'brianbroderick', 'spencerdcarlson']
@@ -17,18 +18,19 @@ Gem::Specification.new do |s|
17
18
  s.metadata = { 'logstash_plugin' => 'true', 'logstash_group' => 'output' }
18
19
 
19
20
  # Gem dependencies
20
- #
21
21
  s.add_runtime_dependency 'logstash-core-plugin-api', ">= 1.60", "<= 2.99"
22
- s.add_runtime_dependency 'logstash-codec-plain'
23
- s.add_development_dependency 'logstash-devutils'
22
+ s.add_runtime_dependency 'logstash-codec-plain', '~> 3.0', '>= 3.0.6'
24
23
 
24
+ # The 'install_jars' rake task will download these jars from Maven and put them into the vendor directory
25
+ # See jar-dependencies gem's wiki - https://github.com/mkristian/jar-dependencies/wiki/declare-jars-inside-gemspec
25
26
  s.requirements << "jar 'com.zaxxer:HikariCP', '2.7.2'"
26
27
  s.requirements << "jar 'org.apache.logging.log4j:log4j-slf4j-impl', '2.6.2'"
27
28
 
28
- s.add_development_dependency 'jar-dependencies'
29
+ s.add_development_dependency 'logstash-devutils', '~> 1.3', '>= 1.3.1'
30
+ s.add_development_dependency 'jar-dependencies', '~> 0.4.0'
29
31
  s.add_development_dependency 'ruby-maven', '~> 3.3'
30
32
  s.add_development_dependency 'rubocop', '0.41.2'
31
- s.add_development_dependency 'logstash-input-generator'
32
- s.add_development_dependency 'logstash-codec-json'
33
- s.add_development_dependency 'insist'
33
+ s.add_development_dependency 'logstash-input-generator', '~> 3.0', '>= 3.0.6'
34
+ s.add_development_dependency 'logstash-codec-json', '~> 3.0', '>= 3.0.5'
35
+ s.add_development_dependency 'insist','~> 1.0'
34
36
  end
@@ -2,215 +2,102 @@ require 'logstash/devutils/rspec/spec_helper'
2
2
  require 'logstash/outputs/charrington'
3
3
  require 'stud/temporary'
4
4
  require 'java'
5
+ require 'logstash-output-charrington_test_jars'
5
6
  require 'securerandom'
7
+ java_import java.util.ArrayList
8
+ java_import java.nio.file.Paths
6
9
 
7
10
  RSpec::Support::ObjectFormatter.default_instance.max_formatted_output_length = 80000
8
-
9
11
  RSpec.configure do |c|
10
-
11
- def start_service(name)
12
- cmd = "sudo /etc/init.d/#{name}* start"
13
-
14
- `which systemctl`
15
- if $?.success?
16
- cmd = "sudo systemctl start #{name}"
17
- end
18
-
19
- `#{cmd}`
20
- end
21
-
22
- def stop_service(name)
23
- cmd = "sudo /etc/init.d/#{name}* stop"
24
-
25
- `which systemctl`
26
- if $?.success?
27
- cmd = "sudo systemctl stop #{name}"
28
- end
29
-
30
- `#{cmd}`
31
- end
32
-
33
- end
34
-
35
- RSpec.shared_context 'rspec setup' do
36
- it 'ensure jar is available' do
37
- expect(ENV[jdbc_jar_env]).not_to be_nil, "#{jdbc_jar_env} not defined, required to run tests"
38
- expect(File.exist?(ENV[jdbc_jar_env])).to eq(true), "#{jdbc_jar_env} defined, but not valid"
39
- end
40
12
  end
41
13
 
42
- RSpec.shared_context 'when initializing' do
43
- it 'shouldn\'t register with a missing jar file' do
44
- jdbc_settings['driver_jar_path'] = nil
45
- plugin = LogStash::Plugin.lookup('output', 'jdbc').new(jdbc_settings)
46
- expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
14
+ RSpec.shared_context 'pipeline' do
15
+ let(:run_pipeline) do
16
+ pipeline = new_pipeline_from_string(config)
17
+ pipeline.run
47
18
  end
48
19
  end
49
20
 
50
- RSpec.shared_context 'when outputting messages' do
51
- let(:logger) {
52
- double("logger")
53
- }
54
-
55
- let(:jdbc_test_table) do
56
- 'logstash_output_jdbc_test'
57
- end
58
-
59
- let(:jdbc_drop_table) do
60
- "DROP TABLE #{jdbc_test_table}"
61
- end
62
-
63
- let(:jdbc_statement_fields) do
64
- [
65
- {db_field: "created_at", db_type: "datetime", db_value: '?', event_field: '@timestamp'},
66
- {db_field: "message", db_type: "varchar(512)", db_value: '?', event_field: 'message'},
67
- {db_field: "message_sprintf", db_type: "varchar(512)", db_value: '?', event_field: 'sprintf-%{message}'},
68
- {db_field: "static_int", db_type: "int", db_value: '?', event_field: 'int'},
69
- {db_field: "static_bigint", db_type: "bigint", db_value: '?', event_field: 'bigint'},
70
- {db_field: "static_float", db_type: "float", db_value: '?', event_field: 'float'},
71
- {db_field: "static_bool", db_type: "boolean", db_value: '?', event_field: 'bool'},
72
- {db_field: "static_bigdec", db_type: "decimal", db_value: '?', event_field: 'bigdec'}
73
- ]
74
- end
75
-
76
- let(:jdbc_create_table) do
77
- fields = jdbc_statement_fields.collect { |entry| "#{entry[:db_field]} #{entry[:db_type]} not null" }.join(", ")
78
-
79
- "CREATE table #{jdbc_test_table} (#{fields})"
80
- end
81
-
82
- let(:jdbc_drop_table) do
83
- "DROP table #{jdbc_test_table}"
84
- end
85
-
86
- let(:jdbc_statement) do
87
- fields = jdbc_statement_fields.collect { |entry| "#{entry[:db_field]}" }.join(", ")
88
- values = jdbc_statement_fields.collect { |entry| "#{entry[:db_value]}" }.join(", ")
89
- statement = jdbc_statement_fields.collect { |entry| entry[:event_field] }
21
+ RSpec.shared_context 'postgres' do
22
+ def start_database(host='localhost', port=57354, database='winston', user='testuser', password='password')
23
+ config = Java::RuYandexQatoolsEmbedPostgresql::EmbeddedPostgres::cachedRuntimeConfig(Paths::get('/tmp/charrington-test-db-cache')) # avoid archive extraction every time
24
+ db = Java::RuYandexQatoolsEmbedPostgresql::EmbeddedPostgres.new
25
+ @url = db.start(config, host, port, database, user, password, ArrayList.new(["-E", "SQL_ASCII", "--locale=C", "--lc-collate=C", "--lc-ctype=C"]))
90
26
 
91
- statement.insert(0, "insert into #{jdbc_test_table} (#{fields}) values(#{values})")
27
+ # setup connection manager
28
+ @connection_manager = Java::ComZaxxerHikari::HikariDataSource.new
29
+ @connection_manager.setDriverClassName('org.postgresql.Driver')
30
+ @connection_manager.setUsername(user)
31
+ @connection_manager.setPassword(password)
32
+ @connection_manager.setJdbcUrl(@url)
92
33
  end
93
34
 
94
- let(:systemd_database_service) do
95
- nil
35
+ before(:all) do
36
+ start_database
96
37
  end
97
38
 
98
- let(:event) do
99
- # TODO: Auto generate fields from jdbc_statement_fields
100
- LogStash::Event.new({
101
- message: "test-message #{SecureRandom.uuid}",
102
- float: 12.1,
103
- bigint: 4000881632477184,
104
- bool: true,
105
- int: 1,
106
- bigdec: BigDecimal.new("123.123")
107
- })
108
- end
109
-
110
- let(:plugin) do
111
- # Setup logger
112
- allow(LogStash::Outputs::Jdbc).to receive(:logger).and_return(logger)
113
-
114
- # XXX: Suppress reflection logging. There has to be a better way around this.
115
- allow(logger).to receive(:debug).with(/config LogStash::/)
116
-
117
- # Suppress beta warnings.
118
- allow(logger).to receive(:info).with(/Please let us know if you find bugs or have suggestions on how to improve this plugin./)
119
-
120
- # Suppress start up messages.
121
- expect(logger).to receive(:info).once.with(/JDBC - Starting up/)
122
-
123
- # Setup plugin
124
- output = LogStash::Plugin.lookup('output', 'jdbc').new(jdbc_settings)
125
- output.register
126
-
127
- output
128
- end
129
-
130
- before :each do
131
- # Setup table
132
- c = plugin.instance_variable_get(:@pool).getConnection
133
-
134
- # Derby doesn't support IF EXISTS.
135
- # Seems like the quickest solution. Bleurgh.
136
- begin
137
- stmt = c.createStatement
138
- stmt.executeUpdate(jdbc_drop_table)
139
- rescue
140
- # noop
141
- ensure
142
- stmt.close
39
+ let(:driver_path) {
40
+ ENV.fetch('DRIVER_JAR_PATH', Pathname.new("#{Dir.pwd}/vendor/jar-dependencies/test-jars/postgresql-42.2.5.jar").to_s)
41
+ }
143
42
 
144
- stmt = c.createStatement
145
- stmt.executeUpdate(jdbc_create_table)
146
- stmt.close
147
- c.close
43
+ def query(sql)
44
+ execute(sql, true)
45
+ end
46
+
47
+ def create(sql)
48
+ execute(sql, false)
49
+ end
50
+
51
+ def insert(sql)
52
+ execute(sql)
53
+ end
54
+
55
+ def execute(sql, results=false)
56
+ conn = @connection_manager.getConnection
57
+ stmt = conn.prepareStatement(sql);
58
+ if !results
59
+ stmt.execute
60
+ return
61
+ else
62
+ rs = stmt.executeQuery()
63
+ meta = rs.getMetaData()
64
+ n = meta.getColumnCount()
65
+ results = []
66
+ while rs.next() do
67
+ row = {}
68
+ (1..n).each do |i|
69
+ row[meta.getColumnName(i).to_sym] = rs.getString(i)
70
+ end
71
+ results << row
72
+ end
73
+ results
148
74
  end
75
+ rescue => e
76
+ puts "Error executing query. sql=#{sql} #{e.message}"
77
+ false
78
+ ensure
79
+ stmt.close if !stmt.nil? and !stmt.isClosed
80
+ conn.close if !conn.nil? and !conn.isClosed
149
81
  end
150
82
 
151
- # Delete table after each
152
- after :each do
153
- c = plugin.instance_variable_get(:@pool).getConnection
154
-
155
- stmt = c.createStatement
156
- stmt.executeUpdate(jdbc_drop_table)
157
- stmt.close
158
- c.close
159
- end
160
-
161
- it 'should save a event' do
162
- expect { plugin.multi_receive([event]) }.to_not raise_error
163
-
164
- # Verify the number of items in the output table
165
- c = plugin.instance_variable_get(:@pool).getConnection
166
-
167
- # TODO replace this simple count with a check of the actual contents
168
-
169
- stmt = c.prepareStatement("select count(*) as total from #{jdbc_test_table} where message = ?")
170
- stmt.setString(1, event.get('message'))
171
- rs = stmt.executeQuery
172
- count = 0
173
- count = rs.getInt('total') while rs.next
174
- stmt.close
175
- c.close
176
-
177
- expect(count).to eq(1)
83
+ def drop_table(table)
84
+ execute_update("DROP TABLE IF EXISTS #{table}")
178
85
  end
179
86
 
180
- it 'should not save event, and log an unretryable exception' do
181
- e = event
182
- original_event = e.get('message')
183
- e.set('message', nil)
184
-
185
- expect(logger).to receive(:error).once.with(/JDBC - Exception. Not retrying/, Hash)
186
- expect { plugin.multi_receive([event]) }.to_not raise_error
187
-
188
- e.set('message', original_event)
87
+ def create_table(sql)
88
+ execute_update(sql)
189
89
  end
190
90
 
191
- it 'it should retry after a connection loss, and log a warning' do
192
- skip "does not run as a service, or known issue with test" if systemd_database_service.nil?
193
-
194
- p = plugin
195
-
196
- # Check that everything is fine right now
197
- expect { p.multi_receive([event]) }.not_to raise_error
198
-
199
- stop_service(systemd_database_service)
200
-
201
- # Start a thread to restart the service after the fact.
202
- t = Thread.new(systemd_database_service) { |systemd_database_service|
203
- sleep 20
204
-
205
- start_service(systemd_database_service)
206
- }
207
-
208
- t.run
209
-
210
- expect(logger).to receive(:warn).at_least(:once).with(/JDBC - Exception. Retrying/, Hash)
211
- expect { p.multi_receive([event]) }.to_not raise_error
212
-
213
- # Wait for the thread to finish
214
- t.join
91
+ def execute_update(sql)
92
+ conn = @connection_manager.getConnection
93
+ stmt = conn.createStatement
94
+ stmt.executeUpdate(sql)
95
+ true
96
+ rescue => e
97
+ puts "Error executing update. sql=#{sql} #{e.message}"
98
+ false
99
+ ensure
100
+ stmt.close if !stmt.nil? and !stmt.isClosed
101
+ conn.close if !conn.nil? and !conn.isClosed
215
102
  end
216
103
  end