logstash-output-charrington 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +38 -1
  3. data/lib/logstash/outputs/charrington.rb +32 -5
  4. data/lib/logstash/outputs/charrington/{alter_table.rb → alter_postgres_table.rb} +6 -5
  5. data/lib/logstash/outputs/charrington/alter_redshift_table.rb +109 -0
  6. data/lib/logstash/outputs/charrington/{create_table.rb → create_postgres_table.rb} +5 -4
  7. data/lib/logstash/outputs/charrington/create_redshift_table.rb +88 -0
  8. data/lib/logstash/outputs/charrington/insert.rb +27 -9
  9. data/lib/logstash/outputs/charrington/process.rb +8 -2
  10. data/lib/logstash/outputs/charrington/{transform.rb → transform_postgres.rb} +1 -1
  11. data/lib/logstash/outputs/charrington/transform_redshift.rb +102 -0
  12. data/logstash-output-charrington.gemspec +11 -9
  13. data/spec/charrington_spec_helper.rb +75 -188
  14. data/spec/logstash-output-charrington_test_jars.rb +5 -0
  15. data/spec/outputs/charrington_spec.rb +118 -39
  16. metadata +52 -34
  17. data/lib/commons-io/commons-io/2.4/commons-io-2.4.jar +0 -0
  18. data/lib/de/flapdoodle/embed/de.flapdoodle.embed.process/2.0.2/de.flapdoodle.embed.process-2.0.2.jar +0 -0
  19. data/lib/net/java/dev/jna/jna-platform/4.0.0/jna-platform-4.0.0.jar +0 -0
  20. data/lib/net/java/dev/jna/jna/4.0.0/jna-4.0.0.jar +0 -0
  21. data/lib/org/apache/commons/commons-compress/1.10/commons-compress-1.10.jar +0 -0
  22. data/lib/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1.jar +0 -0
  23. data/lib/org/postgresql/postgresql/42.2.5/postgresql-42.2.5.jar +0 -0
  24. data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.10/postgresql-embedded-2.10.jar +0 -0
  25. data/lib/ru/yandex/qatools/embed/postgresql-embedded/2.8/postgresql-embedded-2.8.jar +0 -0
  26. data/vendor/postgresql-42.2.5.jar +0 -0
  27. data/vendor/redshift.jar +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 95cf47c25df7b4666cfc94d7ca2c4b964164a0d76b57ce62993f8cd5c4d53bcd
4
- data.tar.gz: 5a5eb5c0323f16d0b9143d6953dcf24fdf9f430e56a751412142f2f7c01dea93
3
+ metadata.gz: a50e0dc6a545b094b6632682214046bedf1fe56842b17331c44b890157719ad6
4
+ data.tar.gz: 1e64be923fef5c32b8a678052d76bf774dadf0214519f5a9aee87052638a90fd
5
5
  SHA512:
6
- metadata.gz: f88562ff8b73f92e22750359178f301c5fef58bd1e2361c9d1d88e8b2c08bba7b69d001be8c5a87fbd2b741ff6bff70138d954a7f094c7794d2155c9103bea23
7
- data.tar.gz: dcdd5585b34e0335693a42a04f4b0897632c7f441ad4cac419975fcca863ea6c3b8eb58154ea8f111557d2d2743c4b4b1d8659c6a5129408134961fed8bf4699
6
+ metadata.gz: 93f149aec77737579df06f0e82a49524bc49264d09d4938d67fed25a6b6606e495171f70863ca3b73fe7ea092097a64f43028411a2fb85ad58f5e9adc24c1f2b
7
+ data.tar.gz: b277fd159871eae94471b3cc2329e5327a1904ccb5e1fffbc2c4f53d40f8b867ebe6346f38af374e3cd428307602a3712e05da5deab5a3eef451ed9733c3d3bf
data/README.md CHANGED
@@ -49,8 +49,45 @@ output {
49
49
  ```
50
50
 
51
51
  ## Build & Publish
52
+
53
+ **TLDR**
54
+ ```bash
55
+ bundle exec rake build # build gem
56
+ gem push logstash-output-charrington-x.x.x.gem # publish
57
+ ```
58
+
59
+ Everything in the [vendor](./vendor) directory (including `test-jars` if it exists) will be packaged with the generated gem.
60
+ Avoid including unnecessary dependencies, by removing the vendor directory and regenerating it with the rake task.
61
+ See the first command below:
62
+
52
63
  ```bash
64
+ rm -r vendor && bundle exec rake install_jars # clean vendor directory and re-install runtime jar dependencies
53
65
  gem build logstash-output-charrington.gemspec # build
54
66
  gem push logstash-output-charrington-x.x.x.gem # publish
55
67
  gem owner --add <email> logstash-output-charrington # add another authorized publisher
56
- ```
68
+ ```
69
+
70
+ ## Testing
71
+ **NOTE:** Downloading test_jars requires maven and Java. You can set them up with asdf: [asdf-java](https://github.com/skotchpine/asdf-java), [asdf-maven](https://github.com/skotchpine/asdf-maven)
72
+ * There is currently an [open bug](https://github.com/skotchpine/asdf-maven/pull/17) with how `asdf-maven` sets `$JAVA_HOME` on a mac. Edit your `~/.asdf/plugins/maven/bin/exec-env` file to fix the bug
73
+
74
+ ```bash
75
+ bundle exec rake install_test_jars # install test runtime jar dependencies
76
+ bundle exec rspec
77
+ ```
78
+
79
+ #### Dangling Embedded Postgres
80
+ **TLDR**
81
+ ```bash
82
+ lsof -PiTCP -sTCP:LISTEN | grep postgres | awk '{print $2}' | xargs kill
83
+ ```
84
+ Sometimes the embedded postgres JAR can leave dangling open processes. This is obvious when you have `export TEST_DEBUG=true`
85
+ and the test output contains a stacktrace that starts with `[2019-06-21T15:18:06,734][ERROR][ru.yandex.qatools.embed.postgresql.PostgresProcess] Failed to read PID file (File '/var/folders/...`.
86
+ To resolve this issue, check for dangling processes using `lsof -PiTCP -sTCP:LISTEN | grep postgres`, then you can kill the process ids
87
+
88
+ #### Resources
89
+ * [logstash-devutils](https://rubygems.org/gems/logstash-devutils) gem with logstash helper methods and tools
90
+ * [spec_helper.rb source](https://github.com/elastic/logstash-devutils/blob/master/lib/logstash/devutils/rspec/spec_helper.rb)
91
+ * [logstash_helpers.rb source](https://github.com/elastic/logstash-devutils/blob/master/lib/logstash/devutils/rspec/logstash_helpers.rb)
92
+ * [logstash-output-file spec test](https://github.com/logstash-plugins/logstash-output-file/blob/master/spec/outputs/file_spec.rb)
93
+
@@ -7,9 +7,9 @@ require 'java'
7
7
  require 'logstash-output-charrington_jars'
8
8
  require 'json'
9
9
  require 'bigdecimal'
10
- require 'pry'
11
10
  require File.join(File.dirname(__FILE__), "charrington/process")
12
- require File.join(File.dirname(__FILE__), "charrington/transform")
11
+ require File.join(File.dirname(__FILE__), "charrington/transform_postgres")
12
+ require File.join(File.dirname(__FILE__), "charrington/transform_redshift")
13
13
  require File.join(File.dirname(__FILE__), "charrington/insert")
14
14
 
15
15
  # Write events to a SQL engine, using JDBC.
@@ -83,7 +83,7 @@ class LogStash::Outputs::Charrington < LogStash::Outputs::Base
83
83
  config :event_as_json_keyword, validate: :string, default: '@event'
84
84
 
85
85
  # The database schema
86
- config :schema, validate: :string, default: ''
86
+ config :schema, validate: :string, required: false
87
87
 
88
88
  def register
89
89
  @logger.info('JDBC - Starting up')
@@ -99,10 +99,13 @@ class LogStash::Outputs::Charrington < LogStash::Outputs::Base
99
99
  connection = get_connection
100
100
  break unless connection
101
101
 
102
+ schema = get_schema(event)
103
+
102
104
  opts = { connection: connection,
103
- schema: @schema,
105
+ schema: schema,
104
106
  max_retries: @max_flush_exceptions,
105
- retry_initial_interval: @retry_initial_interval }
107
+ retry_initial_interval: @retry_initial_interval,
108
+ driver: driver }
106
109
  Charrington::Process.call(connection, event, opts)
107
110
  rescue => e
108
111
  @logger.error("Unable to process event. Event dropped. #{e.message}")
@@ -120,6 +123,15 @@ class LogStash::Outputs::Charrington < LogStash::Outputs::Base
120
123
 
121
124
  private
122
125
 
126
+ def driver
127
+ case @driver_class
128
+ when /redshift/
129
+ "redshift"
130
+ else
131
+ "postgresql"
132
+ end
133
+ end
134
+
123
135
  def setup_and_test_pool!
124
136
  @pool = Java::ComZaxxerHikari::HikariDataSource.new
125
137
  @pool.setDriverClassName(@driver_class) if @driver_class
@@ -173,6 +185,21 @@ class LogStash::Outputs::Charrington < LogStash::Outputs::Base
173
185
  end
174
186
  end
175
187
 
188
+ def get_schema(event)
189
+ if !@schema.nil?
190
+ @schema
191
+ elsif driver == "redshift"
192
+ case event.to_hash["app_name"]
193
+ when "Web App"
194
+ "dea_webapp"
195
+ else
196
+ "dea_mobileapp"
197
+ end
198
+ else
199
+ ""
200
+ end
201
+ end
202
+
176
203
  def get_connection
177
204
  connection = @pool.getConnection
178
205
  rescue => e
@@ -1,21 +1,22 @@
1
1
  require File.join(File.dirname(__FILE__), "service")
2
2
 
3
3
  module Charrington
4
- class AlterTable
4
+ class AlterPostgresTable
5
5
  # This service will add columns to an existing table dynamically based on finding new keys in the JSON structure.
6
6
  # This is potentially called from Insert when an insert fails.
7
7
 
8
8
  include Service
9
- attr_reader :connection, :event, :table_name, :columns
9
+ attr_reader :connection, :event, :table_name, :columns, :schema
10
10
  attr_accessor :column_types
11
11
 
12
12
  Error = Class.new(StandardError)
13
13
  AlterFailed = Class.new(Error)
14
14
 
15
- def initialize(connection, event, table_name, columns)
15
+ def initialize(connection, event, schema, table_name, columns)
16
16
  @connection = connection
17
17
  @event = event.to_hash
18
18
  @table_name = table_name
19
+ @schema = schema
19
20
  @columns = columns
20
21
  @column_types = []
21
22
  end
@@ -33,7 +34,7 @@ module Charrington
33
34
  private
34
35
 
35
36
  def alter_table
36
- execute("ALTER TABLE IF EXISTS #{table_name} #{columns_fragment}")
37
+ execute("ALTER TABLE IF EXISTS #{schema}#{table_name} #{columns_fragment}")
37
38
  end
38
39
 
39
40
  def columns_fragment
@@ -65,7 +66,7 @@ module Charrington
65
66
  end
66
67
 
67
68
  def current_table_columns
68
- sql = "SELECT * FROM #{table_name} LIMIT 1;"
69
+ sql = "SELECT * FROM #{schema}#{table_name} LIMIT 1;"
69
70
  stmt, rs = executeQuery(prep_sql(sql))
70
71
  meta_data = rs.getMetaData()
71
72
  stmt.close unless stmt.nil?
@@ -0,0 +1,109 @@
1
+ require File.join(File.dirname(__FILE__), "service")
2
+
3
+ module Charrington
4
+ class AlterRedshiftTable
5
+ # This service will add columns to an existing table dynamically based on finding new keys in the JSON structure.
6
+ # This is potentially called from Insert when an insert fails.
7
+
8
+ include Service
9
+ attr_reader :connection, :event, :table_name, :columns, :schema
10
+ attr_accessor :column_types
11
+
12
+ Error = Class.new(StandardError)
13
+ AlterFailed = Class.new(Error)
14
+
15
+ def initialize(connection, event, schema, table_name, columns)
16
+ @connection = connection
17
+ @event = event.to_hash
18
+ @schema = schema
19
+ @table_name = table_name
20
+ @columns = columns
21
+ @column_types = []
22
+ end
23
+
24
+ def call
25
+ set_column_types
26
+ alter_table
27
+ true
28
+ rescue => e
29
+ raise AlterFailed, e.message
30
+ ensure
31
+ @column_types.clear if @column_types.is_a? Array
32
+ end
33
+
34
+ private
35
+
36
+ def alter_table
37
+ execute("ALTER TABLE #{schema}#{table_name} #{columns_fragment}")
38
+ end
39
+
40
+ def columns_fragment
41
+ column_types.map do |column|
42
+ "ADD COLUMN #{column}"
43
+ end.join(",")
44
+ end
45
+
46
+ def set_column_types
47
+ (columns - current_table_columns).each_with_index do |key, idx|
48
+
49
+ case event[key]
50
+ when Time, LogStash::Timestamp
51
+ column_types << "#{key} TIMESTAMP"
52
+ when Date
53
+ column_types << "#{key} DATE"
54
+ when Integer
55
+ column_types << "#{key} BIGINT"
56
+ when BigDecimal
57
+ column_types << "#{key} DECIMAL"
58
+ when Float
59
+ column_types << "#{key} DOUBLE PRECISION"
60
+ when true, false
61
+ column_types << "#{key} BOOLEAN"
62
+ else
63
+ column_types << "#{key} VARCHAR(512)"
64
+ end
65
+ end
66
+ end
67
+
68
+ def current_table_columns
69
+ sql = "SELECT * FROM #{schema}#{table_name} LIMIT 1;"
70
+ stmt, rs = executeQuery(prep_sql(sql))
71
+ meta_data = rs.getMetaData()
72
+ stmt.close unless stmt.nil?
73
+ column_count = meta_data.getColumnCount()
74
+ (1..column_count).map {|i| meta_data.getColumnName(i) }
75
+ ensure
76
+ stmt.close unless stmt.nil?
77
+ end
78
+
79
+ def execute(sql)
80
+ stmt = connection.prepareStatement(prep_sql(sql))
81
+ stmt.execute()
82
+ rescue Java::JavaSql::SQLException => e
83
+ puts "Alter Redshift SQLException: #{e.message}"
84
+ rescue => e
85
+ puts "Alter Redshift Unknown exception: #{e.message}"
86
+ ensure
87
+ stmt.close unless stmt.nil?
88
+ end
89
+
90
+ def executeQuery(sql)
91
+ stmt = connection.createStatement()
92
+ # only close the statement if something goes wrong
93
+ # otherwise, the caller is responsible for closing the
94
+ # statement when they are doen with the result set
95
+ return stmt, stmt.executeQuery(prep_sql(sql))
96
+ rescue Java::JavaSql::SQLException => e
97
+ puts "execute query SQLException: #{e.message}"
98
+ stmt.close unless stmt.nil?
99
+ # @logger.error("#{e.message}")
100
+ rescue => e
101
+ puts "execute query Unknown exception: #{e.message}"
102
+ stmt.close unless stmt.nil?
103
+ end
104
+
105
+ def prep_sql(sql)
106
+ sql.gsub(/\s+/, " ").strip
107
+ end
108
+ end
109
+ end
@@ -1,21 +1,22 @@
1
1
  require File.join(File.dirname(__FILE__), "service")
2
2
 
3
3
  module Charrington
4
- class CreateTable
4
+ class CreatePostgresTable
5
5
  # This service will create a table dynamically based on the JSON structure.
6
6
  # This is potentially called from Insert when an insert fails.
7
7
 
8
8
  include Service
9
- attr_reader :connection, :event, :table_name, :columns
9
+ attr_reader :connection, :event, :table_name, :columns, :schema
10
10
  attr_accessor :column_types
11
11
 
12
12
  Error = Class.new(StandardError)
13
13
  CreateFailed = Class.new(Error)
14
14
 
15
- def initialize(connection, event, table_name, columns)
15
+ def initialize(connection, event, schema, table_name, columns)
16
16
  @connection = connection
17
17
  @event = event.to_hash
18
18
  @table_name = table_name
19
+ @schema = schema
19
20
  @columns = columns
20
21
  @column_types = initial_columns
21
22
  end
@@ -59,7 +60,7 @@ module Charrington
59
60
  end
60
61
 
61
62
  def create_table
62
- execute("CREATE TABLE IF NOT EXISTS #{table_name} (#{column_types.join(', ')})")
63
+ execute("CREATE TABLE IF NOT EXISTS #{schema}#{table_name} (#{column_types.join(', ')})")
63
64
  end
64
65
 
65
66
  def execute(sql)
@@ -0,0 +1,88 @@
1
+ require File.join(File.dirname(__FILE__), "service")
2
+
3
+ module Charrington
4
+ class CreateRedshiftTable
5
+ # This service will create a table dynamically based on the JSON structure.
6
+ # This is potentially called from Insert when an insert fails.
7
+
8
+ include Service
9
+ attr_reader :connection, :event, :table_name, :columns, :schema
10
+ attr_accessor :column_types
11
+
12
+ Error = Class.new(StandardError)
13
+ CreateFailed = Class.new(Error)
14
+
15
+ def initialize(connection, event, schema, table_name, columns)
16
+ @connection = connection
17
+ @event = event.to_hash
18
+ @schema = schema
19
+ @table_name = table_name
20
+ @columns = columns
21
+ @column_types = initial_columns
22
+ end
23
+
24
+ def call
25
+ set_column_types
26
+ create_table
27
+ true
28
+ rescue => e
29
+ raise CreateFailed, e.message
30
+ ensure
31
+ @column_types.clear if @column_types.is_a? Array
32
+ end
33
+
34
+ private
35
+
36
+ # https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_TABLE_NEW.html
37
+ def set_column_types
38
+ columns.each do |column|
39
+ if column == "id"
40
+ column_types << "#{column} VARCHAR(512) NOT NULL distkey CONSTRAINT #{table_name}_pkey primary key"
41
+ puts "ADD ID column #{column_types}"
42
+ next
43
+ end
44
+ case event[column]
45
+ when Time, LogStash::Timestamp
46
+ column_types << "#{column} TIMESTAMP"
47
+ when Date
48
+ column_types << "#{column} DATE"
49
+ when Integer
50
+ column_types << "#{column} BIGINT"
51
+ when BigDecimal
52
+ column_types << "#{column} DECIMAL"
53
+ when Float
54
+ column_types << "#{column} DOUBLE PRECISION"
55
+ when true, false
56
+ column_types << "#{column} BOOLEAN"
57
+ else
58
+ column_types << "#{column} VARCHAR(512)"
59
+ end
60
+ end
61
+ end
62
+
63
+ def initial_columns
64
+ [
65
+ 'original_timestamp TIMESTAMP DEFAULT GETDATE()',
66
+ 'received_at TIMESTAMP DEFAULT GETDATE()',
67
+ 'timestamp TIMESTAMP DEFAULT GETDATE()',
68
+ 'uuid_ts TIMESTAMP DEFAULT GETDATE()',
69
+ "uuid bigint default \"identity\"(22828367, 2, '1,1'::text)"
70
+ ]
71
+ end
72
+
73
+ def create_table
74
+ execute("CREATE TABLE IF NOT EXISTS #{schema}#{table_name} (#{column_types.join(', ')}) diststyle key sortkey(received_at)")
75
+ end
76
+
77
+ def execute(sql)
78
+ puts "CREATE SQL #{sql}"
79
+ statement = connection.prepareStatement( sql.gsub(/\s+/, " ").strip )
80
+ puts "CREATE SQL #{sql}"
81
+ statement.execute()
82
+ rescue Java::JavaSql::SQLException => e
83
+ puts "Redshift SQLException: #{e.message}"
84
+ ensure
85
+ statement.close unless statement.nil?
86
+ end
87
+ end
88
+ end
@@ -1,5 +1,7 @@
1
- require File.join(File.dirname(__FILE__), "create_table")
2
- require File.join(File.dirname(__FILE__), "alter_table")
1
+ require File.join(File.dirname(__FILE__), "create_postgres_table")
2
+ require File.join(File.dirname(__FILE__), "create_redshift_table")
3
+ require File.join(File.dirname(__FILE__), "alter_postgres_table")
4
+ require File.join(File.dirname(__FILE__), "alter_redshift_table")
3
5
  require File.join(File.dirname(__FILE__), "service")
4
6
 
5
7
  module Charrington
@@ -9,7 +11,7 @@ module Charrington
9
11
 
10
12
  include Service
11
13
  attr_accessor :event, :should_retry
12
- attr_reader :connection, :schema, :table_name, :columns
14
+ attr_reader :connection, :schema, :table_name, :columns, :driver
13
15
  attr_reader :event_as_json_keyword, :enable_event_as_json_keyword
14
16
 
15
17
  Error = Class.new(StandardError)
@@ -20,35 +22,49 @@ module Charrington
20
22
  def initialize(connection, event, opts = {})
21
23
  raise EventNil, "Table name is nil" if event.nil?
22
24
  @event = event.to_hash
23
-
24
25
  event_name = event["event"].to_s.downcase.strip
25
26
  raise TableNameNil, "Table name is nil" if event_name.empty?
26
27
 
27
28
  @connection = connection
28
29
  @schema = opts[:schema].empty? ? '' : "#{opts[:schema]}."
29
- @table_name = "#{@schema}#{event_name.gsub(/[^a-z0-9]+/, "_")}"
30
+
31
+ @table_name = "#{event_name.gsub(/[^a-z0-9]+/, "_")}"
30
32
 
31
33
  @columns = event.keys
32
34
  @should_retry = false
33
35
  @enable_event_as_json_keyword = opts[:enable_event_as_json_keyword]
34
36
  @event_as_json_keyword = opts[:event_as_json_keyword]
37
+ @driver = opts[:driver]
35
38
  end
36
39
 
37
40
  def call
38
- stmt = connection.prepareStatement(insert_statement)
41
+ insert_stmt = insert_statement
42
+ stmt = connection.prepareStatement(insert_stmt)
39
43
  stmt = add_statement_event_params(stmt)
40
44
  stmt.execute
41
45
  should_retry
42
46
  rescue Java::OrgPostgresqlUtil::PSQLException => e
43
47
  case e.getSQLState()
44
48
  when "42P01"
45
- should_retry = Charrington::CreateTable.call(connection, event, table_name, columns)
49
+ should_retry = Charrington::CreatePostgresTable.call(connection, event, schema, table_name, columns)
50
+ when "42703"
51
+ should_retry = Charrington::AlterPostgresTable.call(connection, event, schema, table_name, columns)
52
+ else
53
+ raise InsertFailed, "Charrington: Rescue from SQLException #{e.message}"
54
+ end
55
+ should_retry
56
+ rescue Java::JavaSql::SQLException => e
57
+ puts "catching SQLException #{e.message}"
58
+ case e.getSQLState()
59
+ when "42P01"
60
+ should_retry = Charrington::CreateRedshiftTable.call(connection, event, schema, table_name, columns)
46
61
  when "42703"
47
- should_retry = Charrington::AlterTable.call(connection, event, table_name, columns)
62
+ should_retry = Charrington::AlterRedshiftTable.call(connection, event, schema, table_name, columns)
48
63
  else
49
64
  raise InsertFailed, "Charrington: Rescue from SQLException #{e.message}"
50
65
  end
51
66
  should_retry
67
+
52
68
  rescue => e
53
69
  raise InsertFailed, "Charrington: Rescue from SQLException #{e.message}"
54
70
  ensure
@@ -77,7 +93,7 @@ module Charrington
77
93
  end
78
94
 
79
95
  def insert_statement
80
- "INSERT INTO #{table_name} #{columns_text} VALUES #{insert_values}"
96
+ "INSERT INTO #{schema}#{table_name} #{columns_text} VALUES #{insert_values}"
81
97
  end
82
98
 
83
99
  def prepared_statement
@@ -134,6 +150,8 @@ module Charrington
134
150
  statement.execute()
135
151
  rescue Java::OrgPostgresqlUtil::PSQLException => e
136
152
  puts "PSQLException: #{e.message}"
153
+ rescue Java::JavaSql::SQLException => e
154
+ puts "Redshift SQLException: #{e.message}"
137
155
  ensure
138
156
  statement.close unless statement.nil?
139
157
  end