logstash-filter-jdbc_static 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -2
  3. data/Gemfile +9 -0
  4. data/docs/index.asciidoc +577 -0
  5. data/lib/logstash-filter-jdbc_static_jars.rb +4 -4
  6. data/lib/logstash/filters/jdbc/basic_database.rb +15 -1
  7. data/lib/logstash/filters/jdbc/column.rb +1 -0
  8. data/lib/logstash/filters/jdbc/db_object.rb +4 -6
  9. data/lib/logstash/filters/jdbc/loader.rb +8 -3
  10. data/lib/logstash/filters/jdbc/loader_schedule.rb +30 -4
  11. data/lib/logstash/filters/jdbc/lookup.rb +1 -0
  12. data/lib/logstash/filters/jdbc/lookup_processor.rb +5 -6
  13. data/lib/logstash/filters/jdbc/lookup_result.rb +1 -0
  14. data/lib/logstash/filters/jdbc/read_write_database.rb +28 -16
  15. data/lib/logstash/filters/jdbc/repeating_load_runner.rb +2 -0
  16. data/lib/logstash/filters/jdbc/single_load_runner.rb +8 -5
  17. data/lib/logstash/filters/jdbc/validatable.rb +2 -5
  18. data/lib/logstash/filters/jdbc_static.rb +26 -7
  19. data/logstash-filter-jdbc_static.gemspec +7 -12
  20. data/spec/filters/jdbc/column_spec.rb +2 -2
  21. data/spec/filters/jdbc/loader_spec.rb +1 -0
  22. data/spec/filters/jdbc/read_only_database_spec.rb +26 -2
  23. data/spec/filters/jdbc/read_write_database_spec.rb +18 -17
  24. data/spec/filters/jdbc/repeating_load_runner_spec.rb +1 -1
  25. data/spec/filters/jdbc_static_spec.rb +95 -17
  26. data/spec/filters/shared_helpers.rb +3 -4
  27. data/vendor/jar-dependencies/{runtime-jars → org/apache/derby/derby/10.14.1.0}/derby-10.14.1.0.jar +0 -0
  28. data/vendor/jar-dependencies/{runtime-jars → org/apache/derby/derbyclient/10.14.1.0}/derbyclient-10.14.1.0.jar +0 -0
  29. metadata +22 -9
@@ -1,5 +1,5 @@
1
- # encoding: utf-8
2
- require 'logstash/environment'
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
3
2
 
4
- root_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
5
- LogStash::Environment.load_runtime_jars! File.join(root_dir, "vendor")
3
+ require 'jar_dependencies'
4
+ require_jar('org.apache.derby', 'derby', '10.14.1.0')
5
+ require_jar('org.apache.derby', 'derbyclient', '10.14.1.0')
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+ require "fileutils"
1
3
  require "sequel"
2
4
  require "sequel/adapters/jdbc"
3
5
  require "java"
@@ -49,6 +51,8 @@ module LogStash module Filters module Jdbc
49
51
  rescue *CONNECTION_ERRORS => err
50
52
  # we do not raise an error when there is a connection error, we hope that the connection works next time
51
53
  logger.error(err_message, :exception => err.message, :backtrace => err.backtrace.take(8))
54
+ else
55
+ raise "::Sequel.connect returned a nil db instance, connection_string: #{@connection_string}, options: #{@options_hash.inspect}" if @db.nil?
52
56
  end
53
57
  end
54
58
 
@@ -80,7 +84,12 @@ module LogStash module Filters module Jdbc
80
84
 
81
85
  def verify_connection(connection_string, driver_class, driver_library, user, password)
82
86
  begin
83
- require driver_library if driver_library
87
+ if driver_library
88
+ class_loader = java.lang.ClassLoader.getSystemClassLoader().to_java(java.net.URLClassLoader)
89
+ driver_library.split(",").each do |driver_path|
90
+ make_driver_path_loadable(class_loader, driver_path.strip)
91
+ end
92
+ end
84
93
  rescue LoadError => e
85
94
  msg = "The driver library cannot be loaded. The system error was: '#{e.message}'."
86
95
  raise wrap_error(ConnectionJdbcException, e, msg)
@@ -106,6 +115,11 @@ module LogStash module Filters module Jdbc
106
115
  end
107
116
  end
108
117
 
118
+ def make_driver_path_loadable(class_loader, driver_path)
119
+ # so we can set an expectation in rspec
120
+ class_loader.add_url(java.io.File.new(driver_path).toURI().toURL())
121
+ end
122
+
109
123
  def post_initialize()
110
124
  @unique_db_name = SecureRandom.hex(12)
111
125
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "validatable"
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -1,10 +1,9 @@
1
+ # encoding: utf-8
1
2
  require_relative "validatable"
2
3
  require_relative "column"
3
4
 
4
5
  module LogStash module Filters module Jdbc
5
6
 
6
- TEMP_TABLE_PREFIX = "temp_".freeze
7
-
8
7
  class DbObject < Validatable
9
8
  # {name => "servers", index_columns => ["ip"], columns => [["ip", "text"], ["name", "text"], ["location", "text"]]},
10
9
 
@@ -12,6 +11,9 @@ module LogStash module Filters module Jdbc
12
11
 
13
12
  def build(db)
14
13
  return unless valid?
14
+ if db.nil?
15
+ raise "DbObject given a database instance that is nil"
16
+ end
15
17
  schema_gen = db.create_table_generator()
16
18
  @columns.each {|col| schema_gen.column(col.name, col.datatype)}
17
19
  schema_gen.index(@index_columns)
@@ -27,10 +29,6 @@ module LogStash module Filters module Jdbc
27
29
  @name <=> other.name
28
30
  end
29
31
 
30
- def as_temp_table_opts
31
- {"name" => "#{TEMP_TABLE_PREFIX}#{@name}", "preserve_existing" => @preserve_existing, "index_columns" => @index_columns.map(&:to_s), "columns" => @columns.map(&:to_array)}
32
- end
33
-
34
32
  def to_s
35
33
  inspect
36
34
  end
@@ -10,9 +10,9 @@ module LogStash module Filters module Jdbc
10
10
 
11
11
  CONNECTION_ERROR_MSG = "Remote DB connection error when executing loader Jdbc query"
12
12
 
13
- attr_reader :id, :table, :temp_table, :query, :max_rows
13
+ attr_reader :id, :table, :query, :max_rows
14
14
  attr_reader :connection_string, :driver_library, :driver_class
15
- attr_reader :user, :password
15
+ attr_reader :user, :password, :staging_directory
16
16
 
17
17
  def build_remote_db
18
18
  @remote = ReadOnlyDatabase.create(connection_string, driver_class, driver_library, user, password)
@@ -46,7 +46,6 @@ module LogStash module Filters module Jdbc
46
46
 
47
47
  def post_initialize
48
48
  if valid?
49
- @temp_table = "#{TEMP_TABLE_PREFIX}#{@table}".to_sym
50
49
  @table = @table.to_sym
51
50
  end
52
51
  end
@@ -108,6 +107,12 @@ module LogStash module Filters module Jdbc
108
107
  else
109
108
  @option_errors << "The 'jdbc_password' option for '#{@table}' must be a string"
110
109
  end
110
+
111
+ @staging_directory = @options["staging_directory"]
112
+ if @staging_directory
113
+ FileUtils.mkdir_p(@staging_directory)
114
+ end
115
+
111
116
  @valid = @option_errors.empty?
112
117
  end
113
118
  end
@@ -1,24 +1,50 @@
1
+ # encoding: utf-8
1
2
  require_relative "validatable"
3
+ require "rufus/scheduler"
2
4
 
3
5
  module LogStash module Filters module Jdbc
4
6
  class LoaderSchedule < Validatable
5
7
  attr_reader :schedule_frequency, :loader_schedule
6
8
 
9
+ def to_log_string
10
+ message = ""
11
+ message.concat "these months in the year [#{@cronline.months.to_a.join(", ")}];" unless @cronline.months.nil?
12
+ message.concat "these days in the month [#{@cronline.days.to_a.join(", ")}];" unless @cronline.days.nil?
13
+ message.concat "these hours in the day [#{@cronline.hours.to_a.join(", ")}];" unless @cronline.hours.nil?
14
+ message.concat "these minutes in the hour [#{@cronline.minutes.to_a.join(", ")}];" unless @cronline.minutes.nil?
15
+ message.concat "these seconds in the minute [#{@cronline.seconds.to_a.join(", ")}]" unless @cronline.seconds.nil?
16
+ if !message.empty?
17
+ message.prepend "Scheduled for: "
18
+ end
19
+ message
20
+ end
21
+
7
22
  private
8
23
 
9
24
  def post_initialize
10
25
  if valid?
11
26
  # From the Rufus::Scheduler docs:
12
- # By default, rufus-scheduler sleeps 0.300 second between every step.
13
- # At each step it checks for jobs to trigger and so on.
14
- if @cronline.seconds.is_a?(Set)
27
+ # By default, rufus-scheduler sleeps 0.300 second between every step.
28
+ # At each step it checks for jobs to trigger and so on.
29
+ # set the frequency to 2.5 seconds if we are not reloading in the seconds timeframe
30
+ # rufus scheduler thread should respond to stop quickly enough.
31
+ if only_seconds_set?
15
32
  @schedule_frequency = 0.3
16
33
  else
17
- @schedule_frequency = 30
34
+ @schedule_frequency = 2.5
18
35
  end
19
36
  end
20
37
  end
21
38
 
39
+
40
+ def only_seconds_set?
41
+ @cronline.seconds &&
42
+ @cronline.minutes.nil? &&
43
+ @cronline.hours.nil? &&
44
+ @cronline.days.nil? &&
45
+ @cronline.months.nil?
46
+ end
47
+
22
48
  def parse_options
23
49
  @loader_schedule = @options
24
50
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "lookup_result"
2
3
  require "logstash/util/loggable"
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "lookup"
2
3
  require_relative "read_write_database"
3
4
 
@@ -23,16 +24,14 @@ module LogStash module Filters module Jdbc
23
24
  errors << instance.formatted_errors
24
25
  end
25
26
  return nil if errors.empty?
26
- # errors.unshift("For plugin #{}")
27
27
  errors.join("; ")
28
28
  end
29
29
 
30
30
  def initialize(lookups_array, globals)
31
- @lookups_errors = []
32
31
  @lookups = lookups_array.map.with_index do |options, i|
33
32
  Lookup.new(options, globals, "lookup-#{i.next}")
34
33
  end
35
- validate_lookups
34
+ @lookups_errors = validate_lookups
36
35
  if @lookups_errors.empty? && !globals.empty?
37
36
  @local = ReadWriteDatabase.create(*globals.values_at(
38
37
  "lookup_jdbc_connection_string",
@@ -61,7 +60,7 @@ module LogStash module Filters module Jdbc
61
60
 
62
61
  private
63
62
 
64
- def validate_lookups
63
+ def validate_lookups(lookups_errors = [])
65
64
  ids = Hash.new(0)
66
65
  errors = []
67
66
  @lookups.each {|lookup| ids[lookup.id] += 1}
@@ -71,7 +70,7 @@ module LogStash module Filters module Jdbc
71
70
  if !errors.empty?
72
71
  errors.unshift("Id setting must be different across all lookups")
73
72
  end
74
- @lookups_errors.concat(errors)
73
+ lookups_errors.concat(errors)
75
74
  targets = Hash.new {|h,k| h[k] = []}
76
75
  errors = []
77
76
  @lookups.each do |lookup|
@@ -85,7 +84,7 @@ module LogStash module Filters module Jdbc
85
84
  if !errors.empty?
86
85
  errors.unshift("Target setting must be different across all lookups")
87
86
  end
88
- @lookups_errors.concat(errors)
87
+ lookups_errors.concat(errors)
89
88
  end
90
89
  end
91
90
  end end end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  module LogStash module Filters module Jdbc
2
3
  class LookupResult
3
4
  attr_reader :payload, :invalid_parameters
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "basic_database"
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -5,13 +6,13 @@ module LogStash module Filters module Jdbc
5
6
  def repopulate_all(loaders)
6
7
  case loaders.size
7
8
  when 1
8
- fill_and_switch(loaders.first)
9
+ fill_local_table(loaders.first)
9
10
  when 2
10
- fill_and_switch(loaders.first)
11
- fill_and_switch(loaders.last)
11
+ fill_local_table(loaders.first)
12
+ fill_local_table(loaders.last)
12
13
  else
13
14
  loaders.each do |loader|
14
- fill_and_switch(loader)
15
+ fill_local_table(loader)
15
16
  end
16
17
  end
17
18
  end
@@ -50,26 +51,37 @@ module LogStash module Filters module Jdbc
50
51
 
51
52
  private
52
53
 
53
- def fill_and_switch(loader)
54
+ def fill_local_table(loader)
54
55
  begin
55
- records = loader.fetch
56
- return if records.size.zero?
57
56
  @rwlock.writeLock().lock()
58
- tmp = self.class.random_name
59
- @db.transaction do |conn|
60
- @db[loader.temp_table].multi_insert(records)
61
- @db.rename_table(loader.temp_table, tmp)
62
- @db.rename_table(loader.table, loader.temp_table)
63
- @db.rename_table(tmp, loader.table)
64
- @db[loader.temp_table].truncate
57
+ start = Time.now.to_f
58
+ records = loader.fetch
59
+ records_size = records.size
60
+ return if records_size.zero?
61
+ logger.info("loader #{loader.id}, fetched #{records_size} records in: #{(Time.now.to_f - start).round(3)} seconds")
62
+ start = Time.now.to_f
63
+ import_file = ::File.join(loader.staging_directory, loader.table.to_s)
64
+ ::File.open(import_file, "w") do |fd|
65
+ dataset = @db[loader.table]
66
+ records.each do |hash|
67
+ array = hash.values.map {|val| dataset.literal(val) }
68
+ fd.puts(array.join(", "))
69
+ end
70
+ fd.fsync
65
71
  end
72
+ logger.info("loader #{loader.id}, saved fetched records to import file in: #{(Time.now.to_f - start).round(3)} seconds")
73
+ start = Time.now.to_f
74
+ import_cmd = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE (null,'#{loader.table.upcase}','#{import_file}',null,'''',null,1)"
75
+ @db.execute_ddl(import_cmd)
76
+ FileUtils.rm_f(import_file)
77
+ logger.info("loader #{loader.id}, imported all fetched records in: #{(Time.now.to_f - start).round(3)} seconds")
66
78
  rescue *CONNECTION_ERRORS => err
67
79
  # we do not raise an error when there is a connection error, we hope that the connection works next time
68
- logger.error("Connection error when filling lookup db from loader query results", :exception => err.message, :backtrace => err.backtrace.take(8))
80
+ logger.error("Connection error when filling lookup db from loader #{loader.id}, query results", :exception => err.message, :backtrace => err.backtrace.take(8))
69
81
  rescue => err
70
82
  # In theory all exceptions in Sequel should be wrapped in Sequel::Error
71
83
  # There are cases where exceptions occur in unprotected ensure sections
72
- msg = "Exception when filling lookup db from loader query results, original exception: #{err.class}, original message: #{err.message}"
84
+ msg = "Exception when filling lookup db from loader #{loader.id}, query results, original exception: #{err.class}, original message: #{err.message}"
73
85
  logger.error(msg, :backtrace => err.backtrace.take(16))
74
86
  raise wrap_error(LoaderJdbcException, err, msg)
75
87
  ensure
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "single_load_runner"
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -6,6 +7,7 @@ module LogStash module Filters module Jdbc
6
7
 
7
8
  def repeated_load
8
9
  local.repopulate_all(loaders)
10
+ @reload_counter.increment
9
11
  end
10
12
  end
11
13
  end end end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative 'db_object'
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -9,12 +10,9 @@ module LogStash module Filters module Jdbc
9
10
  @local = local
10
11
  @loaders = loaders
11
12
  @preloaders = []
13
+ @reload_counter = Concurrent::AtomicFixnum.new(0)
12
14
  preloaders.map do |pre|
13
- dbo = DbObject.new(pre)
14
- @preloaders << dbo
15
- hash = dbo.as_temp_table_opts
16
- _dbo = DbObject.new(hash)
17
- @preloaders << _dbo if _dbo.valid?
15
+ @preloaders << DbObject.new(pre)
18
16
  end
19
17
  @preloaders.sort!
20
18
  end
@@ -22,6 +20,7 @@ module LogStash module Filters module Jdbc
22
20
  def initial_load
23
21
  do_preload
24
22
  local.populate_all(loaders)
23
+ @reload_counter.increment
25
24
  end
26
25
 
27
26
  def repeated_load
@@ -31,6 +30,10 @@ module LogStash module Filters module Jdbc
31
30
  repeated_load
32
31
  end
33
32
 
33
+ def reload_count
34
+ @reload_counter.value
35
+ end
36
+
34
37
  private
35
38
 
36
39
  def do_preload
@@ -1,11 +1,8 @@
1
1
  # encoding: utf-8
2
-
3
2
  module LogStash module Filters module Jdbc
4
3
  class Validatable
5
- def self.find_validation_errors(array_of_options)
6
- if !array_of_options.is_a?(Array)
7
- return "The options must be an Array"
8
- end
4
+ def self.find_validation_errors(options)
5
+ array_of_options = Array(options)
9
6
  errors = []
10
7
  array_of_options.each do |options|
11
8
  instance = new(options)
@@ -77,7 +77,7 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
77
77
  #
78
78
  # There is no schedule by default. If no schedule is given, then the loaders are run
79
79
  # exactly once.
80
- config :loader_schedule, :validate => [LogStash::Filters::Jdbc::LoaderSchedule]
80
+ config :loader_schedule, :required => false, :validate => LogStash::Filters::Jdbc::LoaderSchedule
81
81
 
82
82
  # Append values to the `tags` field if sql error occured
83
83
  # Alternatively, individual `tag_on_failure` arrays can be added to each lookup hash
@@ -87,7 +87,8 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
87
87
  config :tag_on_default_use, :validate => :array, :default => ["_jdbcstaticdefaultsused"]
88
88
 
89
89
  # Remote Load DB Jdbc driver library path to third party driver library.
90
- config :jdbc_driver_library, :validate => :path
90
+ # Use comma separated paths in one string if you need more than one library.
91
+ config :jdbc_driver_library, :validate => :string
91
92
 
92
93
  # Remote Load DB Jdbc driver class to load, for example "oracle.jdbc.OracleDriver" or "org.apache.derby.jdbc.ClientDriver"
93
94
  config :jdbc_driver_class, :validate => :string, :required => true
@@ -101,6 +102,9 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
101
102
  # Remote Load DB Jdbc password
102
103
  config :jdbc_password, :validate => :password
103
104
 
105
+ # directory for temp files created during bulk loader import.
106
+ config :staging_directory, :validate => :string, :default => ::File.join(Dir.tmpdir, "logstash", config_name, "import_data")
107
+
104
108
  # NOTE: For the initial release, we are not allowing the user to specify their own local lookup JDBC DB settings.
105
109
  # In the near future we have to consider identical config running in multiple pipelines stomping over each other
106
110
  # when the database names are common across configs because there is only one Derby server in memory per JVM.
@@ -123,6 +127,11 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
123
127
  unless validation_errors.nil?
124
128
  return false, validation_errors
125
129
  end
130
+ elsif validator.respond_to?(:find_validation_errors)
131
+ validation_errors = validator.find_validation_errors(value)
132
+ unless validation_errors.nil?
133
+ return false, validation_errors
134
+ end
126
135
  else
127
136
  return old_validate_value(value, validator)
128
137
  end
@@ -135,7 +144,6 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
135
144
  def register
136
145
  prepare_data_dir
137
146
  prepare_runner
138
- @loader_runner.initial_load
139
147
  end
140
148
 
141
149
  def filter(event)
@@ -149,12 +157,18 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
149
157
  @processor.close
150
158
  end
151
159
 
160
+ def loader_runner
161
+ # use for test verification
162
+ @loader_runner
163
+ end
164
+
152
165
  private
153
166
 
154
167
  def prepare_data_dir
155
168
  # later, when local persistent databases are allowed set this property to LS_HOME/data/jdbc-static/
156
169
  # must take multi-pipelines into account and more than one config using the same jdbc-static settings
157
170
  java.lang.System.setProperty("derby.system.home", ENV["HOME"])
171
+ logger.info("derby.system.home is: #{java.lang.System.getProperty("derby.system.home")}")
158
172
  end
159
173
 
160
174
  def prepare_runner
@@ -168,18 +182,20 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
168
182
  @processor = Jdbc::LookupProcessor.new(@local_lookups, global_lookup_options)
169
183
  runner_args.unshift(@processor.local)
170
184
  if @loader_schedule
171
- require "rufus/scheduler"
172
185
  args = []
173
186
  @loader_runner = Jdbc::RepeatingLoadRunner.new(*runner_args)
174
-
187
+ @loader_runner.initial_load
175
188
  cronline = Jdbc::LoaderSchedule.new(@loader_schedule)
189
+ cronline.to_log_string.tap do |msg|
190
+ logger.info("Scheduler operations: #{msg}") unless msg.empty?
191
+ end
192
+ logger.info("Scheduler scan for work frequency is: #{cronline.schedule_frequency}")
176
193
  rufus_args = {:max_work_threads => 1, :frequency => cronline.schedule_frequency}
177
-
178
194
  @scheduler = Rufus::Scheduler.new(rufus_args)
179
195
  @scheduler.cron(cronline.loader_schedule, @loader_runner)
180
- @scheduler.join
181
196
  else
182
197
  @loader_runner = Jdbc::SingleLoadRunner.new(*runner_args)
198
+ @loader_runner.initial_load
183
199
  end
184
200
  end
185
201
 
@@ -212,5 +228,8 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
212
228
  if @jdbc_password
213
229
  options["jdbc_password"] = @jdbc_password
214
230
  end
231
+ if @staging_directory
232
+ options["staging_directory"] = @staging_directory
233
+ end
215
234
  end
216
235
  end end end