logstash-filter-jdbc_static 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -2
  3. data/Gemfile +9 -0
  4. data/docs/index.asciidoc +577 -0
  5. data/lib/logstash-filter-jdbc_static_jars.rb +4 -4
  6. data/lib/logstash/filters/jdbc/basic_database.rb +15 -1
  7. data/lib/logstash/filters/jdbc/column.rb +1 -0
  8. data/lib/logstash/filters/jdbc/db_object.rb +4 -6
  9. data/lib/logstash/filters/jdbc/loader.rb +8 -3
  10. data/lib/logstash/filters/jdbc/loader_schedule.rb +30 -4
  11. data/lib/logstash/filters/jdbc/lookup.rb +1 -0
  12. data/lib/logstash/filters/jdbc/lookup_processor.rb +5 -6
  13. data/lib/logstash/filters/jdbc/lookup_result.rb +1 -0
  14. data/lib/logstash/filters/jdbc/read_write_database.rb +28 -16
  15. data/lib/logstash/filters/jdbc/repeating_load_runner.rb +2 -0
  16. data/lib/logstash/filters/jdbc/single_load_runner.rb +8 -5
  17. data/lib/logstash/filters/jdbc/validatable.rb +2 -5
  18. data/lib/logstash/filters/jdbc_static.rb +26 -7
  19. data/logstash-filter-jdbc_static.gemspec +7 -12
  20. data/spec/filters/jdbc/column_spec.rb +2 -2
  21. data/spec/filters/jdbc/loader_spec.rb +1 -0
  22. data/spec/filters/jdbc/read_only_database_spec.rb +26 -2
  23. data/spec/filters/jdbc/read_write_database_spec.rb +18 -17
  24. data/spec/filters/jdbc/repeating_load_runner_spec.rb +1 -1
  25. data/spec/filters/jdbc_static_spec.rb +95 -17
  26. data/spec/filters/shared_helpers.rb +3 -4
  27. data/vendor/jar-dependencies/{runtime-jars → org/apache/derby/derby/10.14.1.0}/derby-10.14.1.0.jar +0 -0
  28. data/vendor/jar-dependencies/{runtime-jars → org/apache/derby/derbyclient/10.14.1.0}/derbyclient-10.14.1.0.jar +0 -0
  29. metadata +22 -9
@@ -1,5 +1,5 @@
1
- # encoding: utf-8
2
- require 'logstash/environment'
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
3
2
 
4
- root_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
5
- LogStash::Environment.load_runtime_jars! File.join(root_dir, "vendor")
3
+ require 'jar_dependencies'
4
+ require_jar('org.apache.derby', 'derby', '10.14.1.0')
5
+ require_jar('org.apache.derby', 'derbyclient', '10.14.1.0')
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+ require "fileutils"
1
3
  require "sequel"
2
4
  require "sequel/adapters/jdbc"
3
5
  require "java"
@@ -49,6 +51,8 @@ module LogStash module Filters module Jdbc
49
51
  rescue *CONNECTION_ERRORS => err
50
52
  # we do not raise an error when there is a connection error, we hope that the connection works next time
51
53
  logger.error(err_message, :exception => err.message, :backtrace => err.backtrace.take(8))
54
+ else
55
+ raise "::Sequel.connect returned a nil db instance, connection_string: #{@connection_string}, options: #{@options_hash.inspect}" if @db.nil?
52
56
  end
53
57
  end
54
58
 
@@ -80,7 +84,12 @@ module LogStash module Filters module Jdbc
80
84
 
81
85
  def verify_connection(connection_string, driver_class, driver_library, user, password)
82
86
  begin
83
- require driver_library if driver_library
87
+ if driver_library
88
+ class_loader = java.lang.ClassLoader.getSystemClassLoader().to_java(java.net.URLClassLoader)
89
+ driver_library.split(",").each do |driver_path|
90
+ make_driver_path_loadable(class_loader, driver_path.strip)
91
+ end
92
+ end
84
93
  rescue LoadError => e
85
94
  msg = "The driver library cannot be loaded. The system error was: '#{e.message}'."
86
95
  raise wrap_error(ConnectionJdbcException, e, msg)
@@ -106,6 +115,11 @@ module LogStash module Filters module Jdbc
106
115
  end
107
116
  end
108
117
 
118
+ def make_driver_path_loadable(class_loader, driver_path)
119
+ # so we can set an expectation in rspec
120
+ class_loader.add_url(java.io.File.new(driver_path).toURI().toURL())
121
+ end
122
+
109
123
  def post_initialize()
110
124
  @unique_db_name = SecureRandom.hex(12)
111
125
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "validatable"
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -1,10 +1,9 @@
1
+ # encoding: utf-8
1
2
  require_relative "validatable"
2
3
  require_relative "column"
3
4
 
4
5
  module LogStash module Filters module Jdbc
5
6
 
6
- TEMP_TABLE_PREFIX = "temp_".freeze
7
-
8
7
  class DbObject < Validatable
9
8
  # {name => "servers", index_columns => ["ip"], columns => [["ip", "text"], ["name", "text"], ["location", "text"]]},
10
9
 
@@ -12,6 +11,9 @@ module LogStash module Filters module Jdbc
12
11
 
13
12
  def build(db)
14
13
  return unless valid?
14
+ if db.nil?
15
+ raise "DbObject given a database instance that is nil"
16
+ end
15
17
  schema_gen = db.create_table_generator()
16
18
  @columns.each {|col| schema_gen.column(col.name, col.datatype)}
17
19
  schema_gen.index(@index_columns)
@@ -27,10 +29,6 @@ module LogStash module Filters module Jdbc
27
29
  @name <=> other.name
28
30
  end
29
31
 
30
- def as_temp_table_opts
31
- {"name" => "#{TEMP_TABLE_PREFIX}#{@name}", "preserve_existing" => @preserve_existing, "index_columns" => @index_columns.map(&:to_s), "columns" => @columns.map(&:to_array)}
32
- end
33
-
34
32
  def to_s
35
33
  inspect
36
34
  end
@@ -10,9 +10,9 @@ module LogStash module Filters module Jdbc
10
10
 
11
11
  CONNECTION_ERROR_MSG = "Remote DB connection error when executing loader Jdbc query"
12
12
 
13
- attr_reader :id, :table, :temp_table, :query, :max_rows
13
+ attr_reader :id, :table, :query, :max_rows
14
14
  attr_reader :connection_string, :driver_library, :driver_class
15
- attr_reader :user, :password
15
+ attr_reader :user, :password, :staging_directory
16
16
 
17
17
  def build_remote_db
18
18
  @remote = ReadOnlyDatabase.create(connection_string, driver_class, driver_library, user, password)
@@ -46,7 +46,6 @@ module LogStash module Filters module Jdbc
46
46
 
47
47
  def post_initialize
48
48
  if valid?
49
- @temp_table = "#{TEMP_TABLE_PREFIX}#{@table}".to_sym
50
49
  @table = @table.to_sym
51
50
  end
52
51
  end
@@ -108,6 +107,12 @@ module LogStash module Filters module Jdbc
108
107
  else
109
108
  @option_errors << "The 'jdbc_password' option for '#{@table}' must be a string"
110
109
  end
110
+
111
+ @staging_directory = @options["staging_directory"]
112
+ if @staging_directory
113
+ FileUtils.mkdir_p(@staging_directory)
114
+ end
115
+
111
116
  @valid = @option_errors.empty?
112
117
  end
113
118
  end
@@ -1,24 +1,50 @@
1
+ # encoding: utf-8
1
2
  require_relative "validatable"
3
+ require "rufus/scheduler"
2
4
 
3
5
  module LogStash module Filters module Jdbc
4
6
  class LoaderSchedule < Validatable
5
7
  attr_reader :schedule_frequency, :loader_schedule
6
8
 
9
+ def to_log_string
10
+ message = ""
11
+ message.concat "these months in the year [#{@cronline.months.to_a.join(", ")}];" unless @cronline.months.nil?
12
+ message.concat "these days in the month [#{@cronline.days.to_a.join(", ")}];" unless @cronline.days.nil?
13
+ message.concat "these hours in the day [#{@cronline.hours.to_a.join(", ")}];" unless @cronline.hours.nil?
14
+ message.concat "these minutes in the hour [#{@cronline.minutes.to_a.join(", ")}];" unless @cronline.minutes.nil?
15
+ message.concat "these seconds in the minute [#{@cronline.seconds.to_a.join(", ")}]" unless @cronline.seconds.nil?
16
+ if !message.empty?
17
+ message.prepend "Scheduled for: "
18
+ end
19
+ message
20
+ end
21
+
7
22
  private
8
23
 
9
24
  def post_initialize
10
25
  if valid?
11
26
  # From the Rufus::Scheduler docs:
12
- # By default, rufus-scheduler sleeps 0.300 second between every step.
13
- # At each step it checks for jobs to trigger and so on.
14
- if @cronline.seconds.is_a?(Set)
27
+ # By default, rufus-scheduler sleeps 0.300 second between every step.
28
+ # At each step it checks for jobs to trigger and so on.
29
+ # set the frequency to 2.5 seconds if we are not reloading in the seconds timeframe
30
+ # rufus scheduler thread should respond to stop quickly enough.
31
+ if only_seconds_set?
15
32
  @schedule_frequency = 0.3
16
33
  else
17
- @schedule_frequency = 30
34
+ @schedule_frequency = 2.5
18
35
  end
19
36
  end
20
37
  end
21
38
 
39
+
40
+ def only_seconds_set?
41
+ @cronline.seconds &&
42
+ @cronline.minutes.nil? &&
43
+ @cronline.hours.nil? &&
44
+ @cronline.days.nil? &&
45
+ @cronline.months.nil?
46
+ end
47
+
22
48
  def parse_options
23
49
  @loader_schedule = @options
24
50
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "lookup_result"
2
3
  require "logstash/util/loggable"
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "lookup"
2
3
  require_relative "read_write_database"
3
4
 
@@ -23,16 +24,14 @@ module LogStash module Filters module Jdbc
23
24
  errors << instance.formatted_errors
24
25
  end
25
26
  return nil if errors.empty?
26
- # errors.unshift("For plugin #{}")
27
27
  errors.join("; ")
28
28
  end
29
29
 
30
30
  def initialize(lookups_array, globals)
31
- @lookups_errors = []
32
31
  @lookups = lookups_array.map.with_index do |options, i|
33
32
  Lookup.new(options, globals, "lookup-#{i.next}")
34
33
  end
35
- validate_lookups
34
+ @lookups_errors = validate_lookups
36
35
  if @lookups_errors.empty? && !globals.empty?
37
36
  @local = ReadWriteDatabase.create(*globals.values_at(
38
37
  "lookup_jdbc_connection_string",
@@ -61,7 +60,7 @@ module LogStash module Filters module Jdbc
61
60
 
62
61
  private
63
62
 
64
- def validate_lookups
63
+ def validate_lookups(lookups_errors = [])
65
64
  ids = Hash.new(0)
66
65
  errors = []
67
66
  @lookups.each {|lookup| ids[lookup.id] += 1}
@@ -71,7 +70,7 @@ module LogStash module Filters module Jdbc
71
70
  if !errors.empty?
72
71
  errors.unshift("Id setting must be different across all lookups")
73
72
  end
74
- @lookups_errors.concat(errors)
73
+ lookups_errors.concat(errors)
75
74
  targets = Hash.new {|h,k| h[k] = []}
76
75
  errors = []
77
76
  @lookups.each do |lookup|
@@ -85,7 +84,7 @@ module LogStash module Filters module Jdbc
85
84
  if !errors.empty?
86
85
  errors.unshift("Target setting must be different across all lookups")
87
86
  end
88
- @lookups_errors.concat(errors)
87
+ lookups_errors.concat(errors)
89
88
  end
90
89
  end
91
90
  end end end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  module LogStash module Filters module Jdbc
2
3
  class LookupResult
3
4
  attr_reader :payload, :invalid_parameters
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "basic_database"
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -5,13 +6,13 @@ module LogStash module Filters module Jdbc
5
6
  def repopulate_all(loaders)
6
7
  case loaders.size
7
8
  when 1
8
- fill_and_switch(loaders.first)
9
+ fill_local_table(loaders.first)
9
10
  when 2
10
- fill_and_switch(loaders.first)
11
- fill_and_switch(loaders.last)
11
+ fill_local_table(loaders.first)
12
+ fill_local_table(loaders.last)
12
13
  else
13
14
  loaders.each do |loader|
14
- fill_and_switch(loader)
15
+ fill_local_table(loader)
15
16
  end
16
17
  end
17
18
  end
@@ -50,26 +51,37 @@ module LogStash module Filters module Jdbc
50
51
 
51
52
  private
52
53
 
53
- def fill_and_switch(loader)
54
+ def fill_local_table(loader)
54
55
  begin
55
- records = loader.fetch
56
- return if records.size.zero?
57
56
  @rwlock.writeLock().lock()
58
- tmp = self.class.random_name
59
- @db.transaction do |conn|
60
- @db[loader.temp_table].multi_insert(records)
61
- @db.rename_table(loader.temp_table, tmp)
62
- @db.rename_table(loader.table, loader.temp_table)
63
- @db.rename_table(tmp, loader.table)
64
- @db[loader.temp_table].truncate
57
+ start = Time.now.to_f
58
+ records = loader.fetch
59
+ records_size = records.size
60
+ return if records_size.zero?
61
+ logger.info("loader #{loader.id}, fetched #{records_size} records in: #{(Time.now.to_f - start).round(3)} seconds")
62
+ start = Time.now.to_f
63
+ import_file = ::File.join(loader.staging_directory, loader.table.to_s)
64
+ ::File.open(import_file, "w") do |fd|
65
+ dataset = @db[loader.table]
66
+ records.each do |hash|
67
+ array = hash.values.map {|val| dataset.literal(val) }
68
+ fd.puts(array.join(", "))
69
+ end
70
+ fd.fsync
65
71
  end
72
+ logger.info("loader #{loader.id}, saved fetched records to import file in: #{(Time.now.to_f - start).round(3)} seconds")
73
+ start = Time.now.to_f
74
+ import_cmd = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE (null,'#{loader.table.upcase}','#{import_file}',null,'''',null,1)"
75
+ @db.execute_ddl(import_cmd)
76
+ FileUtils.rm_f(import_file)
77
+ logger.info("loader #{loader.id}, imported all fetched records in: #{(Time.now.to_f - start).round(3)} seconds")
66
78
  rescue *CONNECTION_ERRORS => err
67
79
  # we do not raise an error when there is a connection error, we hope that the connection works next time
68
- logger.error("Connection error when filling lookup db from loader query results", :exception => err.message, :backtrace => err.backtrace.take(8))
80
+ logger.error("Connection error when filling lookup db from loader #{loader.id}, query results", :exception => err.message, :backtrace => err.backtrace.take(8))
69
81
  rescue => err
70
82
  # In theory all exceptions in Sequel should be wrapped in Sequel::Error
71
83
  # There are cases where exceptions occur in unprotected ensure sections
72
- msg = "Exception when filling lookup db from loader query results, original exception: #{err.class}, original message: #{err.message}"
84
+ msg = "Exception when filling lookup db from loader #{loader.id}, query results, original exception: #{err.class}, original message: #{err.message}"
73
85
  logger.error(msg, :backtrace => err.backtrace.take(16))
74
86
  raise wrap_error(LoaderJdbcException, err, msg)
75
87
  ensure
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative "single_load_runner"
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -6,6 +7,7 @@ module LogStash module Filters module Jdbc
6
7
 
7
8
  def repeated_load
8
9
  local.repopulate_all(loaders)
10
+ @reload_counter.increment
9
11
  end
10
12
  end
11
13
  end end end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative 'db_object'
2
3
 
3
4
  module LogStash module Filters module Jdbc
@@ -9,12 +10,9 @@ module LogStash module Filters module Jdbc
9
10
  @local = local
10
11
  @loaders = loaders
11
12
  @preloaders = []
13
+ @reload_counter = Concurrent::AtomicFixnum.new(0)
12
14
  preloaders.map do |pre|
13
- dbo = DbObject.new(pre)
14
- @preloaders << dbo
15
- hash = dbo.as_temp_table_opts
16
- _dbo = DbObject.new(hash)
17
- @preloaders << _dbo if _dbo.valid?
15
+ @preloaders << DbObject.new(pre)
18
16
  end
19
17
  @preloaders.sort!
20
18
  end
@@ -22,6 +20,7 @@ module LogStash module Filters module Jdbc
22
20
  def initial_load
23
21
  do_preload
24
22
  local.populate_all(loaders)
23
+ @reload_counter.increment
25
24
  end
26
25
 
27
26
  def repeated_load
@@ -31,6 +30,10 @@ module LogStash module Filters module Jdbc
31
30
  repeated_load
32
31
  end
33
32
 
33
+ def reload_count
34
+ @reload_counter.value
35
+ end
36
+
34
37
  private
35
38
 
36
39
  def do_preload
@@ -1,11 +1,8 @@
1
1
  # encoding: utf-8
2
-
3
2
  module LogStash module Filters module Jdbc
4
3
  class Validatable
5
- def self.find_validation_errors(array_of_options)
6
- if !array_of_options.is_a?(Array)
7
- return "The options must be an Array"
8
- end
4
+ def self.find_validation_errors(options)
5
+ array_of_options = Array(options)
9
6
  errors = []
10
7
  array_of_options.each do |options|
11
8
  instance = new(options)
@@ -77,7 +77,7 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
77
77
  #
78
78
  # There is no schedule by default. If no schedule is given, then the loaders are run
79
79
  # exactly once.
80
- config :loader_schedule, :validate => [LogStash::Filters::Jdbc::LoaderSchedule]
80
+ config :loader_schedule, :required => false, :validate => LogStash::Filters::Jdbc::LoaderSchedule
81
81
 
82
82
  # Append values to the `tags` field if sql error occured
83
83
  # Alternatively, individual `tag_on_failure` arrays can be added to each lookup hash
@@ -87,7 +87,8 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
87
87
  config :tag_on_default_use, :validate => :array, :default => ["_jdbcstaticdefaultsused"]
88
88
 
89
89
  # Remote Load DB Jdbc driver library path to third party driver library.
90
- config :jdbc_driver_library, :validate => :path
90
+ # Use comma separated paths in one string if you need more than one library.
91
+ config :jdbc_driver_library, :validate => :string
91
92
 
92
93
  # Remote Load DB Jdbc driver class to load, for example "oracle.jdbc.OracleDriver" or "org.apache.derby.jdbc.ClientDriver"
93
94
  config :jdbc_driver_class, :validate => :string, :required => true
@@ -101,6 +102,9 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
101
102
  # Remote Load DB Jdbc password
102
103
  config :jdbc_password, :validate => :password
103
104
 
105
+ # directory for temp files created during bulk loader import.
106
+ config :staging_directory, :validate => :string, :default => ::File.join(Dir.tmpdir, "logstash", config_name, "import_data")
107
+
104
108
  # NOTE: For the initial release, we are not allowing the user to specify their own local lookup JDBC DB settings.
105
109
  # In the near future we have to consider identical config running in multiple pipelines stomping over each other
106
110
  # when the database names are common across configs because there is only one Derby server in memory per JVM.
@@ -123,6 +127,11 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
123
127
  unless validation_errors.nil?
124
128
  return false, validation_errors
125
129
  end
130
+ elsif validator.respond_to?(:find_validation_errors)
131
+ validation_errors = validator.find_validation_errors(value)
132
+ unless validation_errors.nil?
133
+ return false, validation_errors
134
+ end
126
135
  else
127
136
  return old_validate_value(value, validator)
128
137
  end
@@ -135,7 +144,6 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
135
144
  def register
136
145
  prepare_data_dir
137
146
  prepare_runner
138
- @loader_runner.initial_load
139
147
  end
140
148
 
141
149
  def filter(event)
@@ -149,12 +157,18 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
149
157
  @processor.close
150
158
  end
151
159
 
160
+ def loader_runner
161
+ # use for test verification
162
+ @loader_runner
163
+ end
164
+
152
165
  private
153
166
 
154
167
  def prepare_data_dir
155
168
  # later, when local persistent databases are allowed set this property to LS_HOME/data/jdbc-static/
156
169
  # must take multi-pipelines into account and more than one config using the same jdbc-static settings
157
170
  java.lang.System.setProperty("derby.system.home", ENV["HOME"])
171
+ logger.info("derby.system.home is: #{java.lang.System.getProperty("derby.system.home")}")
158
172
  end
159
173
 
160
174
  def prepare_runner
@@ -168,18 +182,20 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
168
182
  @processor = Jdbc::LookupProcessor.new(@local_lookups, global_lookup_options)
169
183
  runner_args.unshift(@processor.local)
170
184
  if @loader_schedule
171
- require "rufus/scheduler"
172
185
  args = []
173
186
  @loader_runner = Jdbc::RepeatingLoadRunner.new(*runner_args)
174
-
187
+ @loader_runner.initial_load
175
188
  cronline = Jdbc::LoaderSchedule.new(@loader_schedule)
189
+ cronline.to_log_string.tap do |msg|
190
+ logger.info("Scheduler operations: #{msg}") unless msg.empty?
191
+ end
192
+ logger.info("Scheduler scan for work frequency is: #{cronline.schedule_frequency}")
176
193
  rufus_args = {:max_work_threads => 1, :frequency => cronline.schedule_frequency}
177
-
178
194
  @scheduler = Rufus::Scheduler.new(rufus_args)
179
195
  @scheduler.cron(cronline.loader_schedule, @loader_runner)
180
- @scheduler.join
181
196
  else
182
197
  @loader_runner = Jdbc::SingleLoadRunner.new(*runner_args)
198
+ @loader_runner.initial_load
183
199
  end
184
200
  end
185
201
 
@@ -212,5 +228,8 @@ module LogStash module Filters class JdbcStatic < LogStash::Filters::Base
212
228
  if @jdbc_password
213
229
  options["jdbc_password"] = @jdbc_password
214
230
  end
231
+ if @staging_directory
232
+ options["staging_directory"] = @staging_directory
233
+ end
215
234
  end
216
235
  end end end