logstash-input-jdbc 2.1.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6fc9b06fd127404a35d42af320ad90b37f0ff642
4
- data.tar.gz: 21798f6ed0c0620f054acc6abe0c207211fadc70
3
+ metadata.gz: bf7a5bf46a7c317d17c7e12169046979b19242c1
4
+ data.tar.gz: 26fe05b541b4a84b764e08d6f9527bda90329639
5
5
  SHA512:
6
- metadata.gz: bc1d4d90e171555d00334ab59ef67699bdf55e714557ef565a9b47d870b20d233f23fe7f0a48ed0025396ab822a9e18971d7eed1692027eac4971ced58f1ffb6
7
- data.tar.gz: aeeb2afa34a2767d035c20e71c0efdd709f9df48988954761d064e542def704ba7c4ec8e52248187eb8df8847f6db1bfb5c60f081db2b1bd8709f6bb908210f3
6
+ metadata.gz: 955833f1f3abe24e373a193cac7e34d27de4ba0245cfb7210955e45a4a650c512ecd8e1f2044de746d057b386123cb2bd2be5efaba705b9e1e581b8d2ee268cf
7
+ data.tar.gz: d36bc646854ef3409fe41055eca173eabcb9ea19913738d658fded3ae410c95f3169c9e9b1ff3ff8fc8a2532c9c48e779588e140543bc9a613a5606a9fcca6e2
@@ -1,3 +1,6 @@
1
+ ## 3.0.0
2
+ - [#57](https://github.com/logstash-plugins/logstash-input-jdbc/issues/57) New feature: Allow tracking by a column value rather than by last run time. **This is a breaking change**, as users may be required to change from using `sql_last_start` to use `sql_last_value` in their queries. No other changes are required if you've been using time-based queries. See the documentation if you wish to use an incremental column value to track updates to your tables.
3
+
1
4
  ## 2.1.1
2
5
  - [#44](https://github.com/logstash-plugins/logstash-input-jdbc/issues/44) add option to control the lowercase or not, of the column names.
3
6
 
@@ -35,12 +35,12 @@ require "yaml" # persistence
35
35
  #
36
36
  # ==== State
37
37
  #
38
- # The plugin will persist the `sql_last_start` parameter in the form of a
39
- # metadata file stored in the configured `last_run_metadata_path`. Upon shutting down,
40
- # this file will be updated with the current value of `sql_last_start`. Next time
38
+ # The plugin will persist the `sql_last_value` parameter in the form of a
39
+ # metadata file stored in the configured `last_run_metadata_path`. Upon query execution,
40
+ # this file will be updated with the current value of `sql_last_value`. Next time
41
41
  # the pipeline starts up, this value will be updated by reading from the file. If
42
- # `clean_run` is set to true, this value will be ignored and `sql_last_start` will be
43
- # set to Jan 1, 1970, as if no query has ever been executed.
42
+ # `clean_run` is set to true, this value will be ignored and `sql_last_value` will be
43
+ # set to Jan 1, 1970, or 0 if `use_column_value` is true, as if no query has ever been executed.
44
44
  #
45
45
  # ==== Dealing With Large Result-sets
46
46
  #
@@ -90,8 +90,9 @@ require "yaml" # persistence
90
90
  # Here is the list:
91
91
  #
92
92
  # |==========================================================
93
- # |sql_last_start | The last time a statement was executed. This is set to Thursday, 1 January 1970
94
- # before any query is run, and updated accordingly after first query is run.
93
+ # |sql_last_value | The value used to calculate which rows to query. Before any query is run,
94
+ # this is set to Thursday, 1 January 1970, or 0 if `use_column_value` is true and
95
+ # `tracking_column` is set. It is updated accordingly after subsequent queries are run.
95
96
  # |==========================================================
96
97
  #
97
98
  class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
@@ -131,6 +132,12 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
131
132
  # Path to file with last run time
132
133
  config :last_run_metadata_path, :validate => :string, :default => "#{ENV['HOME']}/.logstash_jdbc_last_run"
133
134
 
135
+ # Use an incremental column value rather than a timestamp
136
+ config :use_column_value, :validate => :boolean, :default => false
137
+
138
+ # If tracking column value rather than timestamp, the column whose value is to be tracked
139
+ config :tracking_column, :validate => :string
140
+
134
141
  # Whether the previous run state should be preserved
135
142
  config :clean_run, :validate => :boolean, :default => false
136
143
 
@@ -146,11 +153,18 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
146
153
  require "rufus/scheduler"
147
154
  prepare_jdbc_connection
148
155
 
149
- # load sql_last_start from file if exists
156
+ # Raise an error if @use_column_value is true, but no @tracking_column is set
157
+ if @use_column_value
158
+ if @tracking_column.nil?
159
+ raise(LogStash::ConfigurationError, "Must set :tracking_column if :use_column_value is true.")
160
+ end
161
+ end
162
+
163
+ # load sql_last_value from file if exists
150
164
  if @clean_run && File.exist?(@last_run_metadata_path)
151
165
  File.delete(@last_run_metadata_path)
152
166
  elsif File.exist?(@last_run_metadata_path)
153
- @sql_last_start = YAML.load(File.read(@last_run_metadata_path))
167
+ @sql_last_value = YAML.load(File.read(@last_run_metadata_path))
154
168
  end
155
169
 
156
170
  unless @statement.nil? ^ @statement_filepath.nil?
@@ -165,22 +179,19 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
165
179
  @scheduler = Rufus::Scheduler.new(:max_work_threads => 1)
166
180
  @scheduler.cron @schedule do
167
181
  execute_query(queue)
182
+ update_state_file
168
183
  end
169
184
 
170
185
  @scheduler.join
171
186
  else
172
187
  execute_query(queue)
188
+ update_state_file
173
189
  end
174
190
  end # def run
175
191
 
176
192
  def stop
177
193
  @scheduler.stop if @scheduler
178
194
 
179
- # update state file for next run
180
- if @record_last_run
181
- File.write(@last_run_metadata_path, YAML.dump(@sql_last_start))
182
- end
183
-
184
195
  close_jdbc_connection
185
196
  end
186
197
 
@@ -188,11 +199,18 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
188
199
 
189
200
  def execute_query(queue)
190
201
  # update default parameters
191
- @parameters['sql_last_start'] = @sql_last_start
202
+ @parameters['sql_last_value'] = @sql_last_value
192
203
  execute_statement(@statement, @parameters) do |row|
193
204
  event = LogStash::Event.new(row)
194
205
  decorate(event)
195
206
  queue << event
196
207
  end
197
208
  end
209
+
210
+ def update_state_file
211
+ if @record_last_run
212
+ File.write(@last_run_metadata_path, YAML.dump(@sql_last_value))
213
+ end
214
+ end
215
+
198
216
  end # class LogStash::Inputs::Jdbc
@@ -161,7 +161,11 @@ module LogStash::PluginMixins::Jdbc
161
161
  else
162
162
  @database.identifier_output_method = :to_s
163
163
  end
164
- @sql_last_start = Time.at(0).utc
164
+ if @use_column_value
165
+ @sql_last_value = 0
166
+ else
167
+ @sql_last_value = Time.at(0).utc
168
+ end
165
169
  end # def prepare_jdbc_connection
166
170
 
167
171
  public
@@ -175,27 +179,47 @@ module LogStash::PluginMixins::Jdbc
175
179
  begin
176
180
  parameters = symbolized_params(parameters)
177
181
  query = @database[statement, parameters]
182
+ sql_last_value = @use_column_value ? @sql_last_value : Time.now.utc
183
+ @tracking_column_warning_sent = false
178
184
  @logger.debug? and @logger.debug("Executing JDBC query", :statement => statement, :parameters => parameters, :count => query.count)
179
- @sql_last_start = Time.now.utc
180
185
 
181
186
  if @jdbc_paging_enabled
182
187
  query.each_page(@jdbc_page_size) do |paged_dataset|
183
188
  paged_dataset.each do |row|
189
+ sql_last_value = get_column_value(row) if @use_column_value
184
190
  yield extract_values_from(row)
185
191
  end
186
192
  end
187
193
  else
188
194
  query.each do |row|
195
+ sql_last_value = get_column_value(row) if @use_column_value
189
196
  yield extract_values_from(row)
190
197
  end
191
198
  end
192
199
  success = true
193
200
  rescue Sequel::DatabaseConnectionError, Sequel::DatabaseError => e
194
201
  @logger.warn("Exception when executing JDBC query", :exception => e)
202
+ else
203
+ @sql_last_value = sql_last_value
195
204
  end
196
205
  return success
197
206
  end
198
207
 
208
+ public
209
+ def get_column_value(row)
210
+ if !row.has_key?(@tracking_column.to_sym)
211
+ if !@tracking_column_warning_sent
212
+ @logger.warn("tracking_column not found in dataset.", :tracking_column => @tracking_column)
213
+ @tracking_column_warning_sent = true
214
+ end
215
+ # If we can't find the tracking column, return the current value in the ivar
216
+ @sql_last_value
217
+ else
218
+ # Otherwise send the updated tracking column
219
+ row[@tracking_column.to_sym]
220
+ end
221
+ end
222
+
199
223
  # Symbolize parameters keys to use with Sequel
200
224
  private
201
225
  def symbolized_params(parameters)
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-jdbc'
3
- s.version = '2.1.1'
3
+ s.version = '3.0.0'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = "This example input streams a string at a definable interval."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -161,6 +161,40 @@ describe LogStash::Inputs::Jdbc do
161
161
 
162
162
  end
163
163
 
164
+ context "when scheduling and previous runs are to be preserved" do
165
+ let(:settings) do
166
+ {
167
+ "statement" => "SELECT 1 as num_param FROM SYSIBM.SYSDUMMY1",
168
+ "schedule" => "* * * * * UTC",
169
+ "last_run_metadata_path" => Stud::Temporary.pathname
170
+ }
171
+ end
172
+
173
+ let(:last_run_time) { Time.at(1).utc }
174
+
175
+ before do
176
+ plugin.register
177
+ end
178
+
179
+ it "should flush previous run metadata per query execution" do
180
+ Timecop.travel(Time.new(2000))
181
+ Timecop.scale(60)
182
+ runner = Thread.new do
183
+ plugin.run(queue)
184
+ end
185
+ sleep 1
186
+ for i in 0..1
187
+ sleep 1
188
+ updated_last_run = YAML.load(File.read(settings["last_run_metadata_path"]))
189
+ expect(updated_last_run).to be > last_run_time
190
+ last_run_time = updated_last_run
191
+ end
192
+
193
+ plugin.stop
194
+ end
195
+
196
+ end
197
+
164
198
  context "when iterating result-set via paging" do
165
199
 
166
200
  let(:settings) do
@@ -295,7 +329,7 @@ describe LogStash::Inputs::Jdbc do
295
329
 
296
330
  context "when iteratively running plugin#run" do
297
331
  let(:settings) do
298
- {"statement" => "SELECT num, created_at FROM test_table WHERE created_at > :sql_last_start"}
332
+ {"statement" => "SELECT num, created_at FROM test_table WHERE created_at > :sql_last_value"}
299
333
  end
300
334
 
301
335
  let(:nums) { [10, 20, 30, 40, 50] }
@@ -329,14 +363,148 @@ describe LogStash::Inputs::Jdbc do
329
363
  end
330
364
  end
331
365
 
332
- context "when previous runs are to be respected" do
366
+ context "when iteratively running plugin#run with tracking_column" do
367
+ let(:mixin_settings) do
368
+ { "jdbc_user" => ENV['USER'], "jdbc_driver_class" => "org.apache.derby.jdbc.EmbeddedDriver",
369
+ "jdbc_connection_string" => "jdbc:derby:memory:testdb;create=true"
370
+ }
371
+ end
333
372
 
334
373
  let(:settings) do
335
- { "statement" => "SELECT * FROM test_table",
374
+ { "statement" => "SELECT num, created_at FROM test_table WHERE num > :sql_last_value",
375
+ "use_column_value" => true,
376
+ "tracking_column" => "num",
336
377
  "last_run_metadata_path" => Stud::Temporary.pathname }
337
378
  end
338
379
 
339
- let(:last_run_time) { Time.at(1).utc }
380
+ let(:nums) { [10, 20, 30, 40, 50] }
381
+
382
+ before do
383
+ plugin.register
384
+ end
385
+
386
+ after do
387
+ plugin.stop
388
+ end
389
+
390
+ it "should successfully update sql_last_value" do
391
+ test_table = db[:test_table]
392
+
393
+ plugin.run(queue)
394
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(0)
395
+ test_table.insert(:num => nums[0], :created_at => Time.now.utc)
396
+ test_table.insert(:num => nums[1], :created_at => Time.now.utc)
397
+ plugin.run(queue)
398
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
399
+ test_table.insert(:num => nums[2], :created_at => Time.now.utc)
400
+ test_table.insert(:num => nums[3], :created_at => Time.now.utc)
401
+ test_table.insert(:num => nums[4], :created_at => Time.now.utc)
402
+ plugin.run(queue)
403
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(50)
404
+ end
405
+ end
406
+
407
+ context "when iteratively running plugin#run with tracking_column and stored metadata" do
408
+ let(:mixin_settings) do
409
+ { "jdbc_user" => ENV['USER'], "jdbc_driver_class" => "org.apache.derby.jdbc.EmbeddedDriver",
410
+ "jdbc_connection_string" => "jdbc:derby:memory:testdb;create=true"
411
+ }
412
+ end
413
+
414
+ let(:settings) do
415
+ { "statement" => "SELECT num, created_at FROM test_table WHERE num > :sql_last_value",
416
+ "use_column_value" => true,
417
+ "tracking_column" => "num",
418
+ "last_run_metadata_path" => Stud::Temporary.pathname }
419
+ end
420
+
421
+ let(:nums) { [10, 20, 30, 40, 50] }
422
+ let(:last_run_value) { 20 }
423
+
424
+ before do
425
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
426
+ plugin.register
427
+ end
428
+
429
+ after do
430
+ plugin.stop
431
+ end
432
+
433
+ it "should successfully update sql_last_value and only add appropriate events" do
434
+ test_table = db[:test_table]
435
+
436
+ plugin.run(queue)
437
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
438
+ expect(queue.length).to eq(0) # Shouldn't grab anything here.
439
+ test_table.insert(:num => nums[0], :created_at => Time.now.utc)
440
+ test_table.insert(:num => nums[1], :created_at => Time.now.utc)
441
+ plugin.run(queue)
442
+ expect(queue.length).to eq(0) # Shouldn't grab anything here either.
443
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
444
+ test_table.insert(:num => nums[2], :created_at => Time.now.utc)
445
+ test_table.insert(:num => nums[3], :created_at => Time.now.utc)
446
+ test_table.insert(:num => nums[4], :created_at => Time.now.utc)
447
+ plugin.run(queue)
448
+ expect(queue.length).to eq(3) # Only values greater than 20 should be grabbed.
449
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(50)
450
+ end
451
+ end
452
+
453
+ context "when iteratively running plugin#run with BAD tracking_column and stored metadata" do
454
+ let(:mixin_settings) do
455
+ { "jdbc_user" => ENV['USER'], "jdbc_driver_class" => "org.apache.derby.jdbc.EmbeddedDriver",
456
+ "jdbc_connection_string" => "jdbc:derby:memory:testdb;create=true"
457
+ }
458
+ end
459
+
460
+ let(:settings) do
461
+ { "statement" => "SELECT num, created_at FROM test_table WHERE num > :sql_last_value",
462
+ "use_column_value" => true,
463
+ "tracking_column" => "not_num",
464
+ "last_run_metadata_path" => Stud::Temporary.pathname }
465
+ end
466
+
467
+ let(:nums) { [10, 20, 30, 40, 50] }
468
+ let(:last_run_value) { 20 }
469
+
470
+ before do
471
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
472
+ plugin.register
473
+ end
474
+
475
+ after do
476
+ plugin.stop
477
+ end
478
+
479
+ it "should send a warning and not update sql_last_value" do
480
+ test_table = db[:test_table]
481
+
482
+ plugin.run(queue)
483
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
484
+ expect(queue.length).to eq(0) # Shouldn't grab anything here.
485
+ test_table.insert(:num => nums[0], :created_at => Time.now.utc)
486
+ test_table.insert(:num => nums[1], :created_at => Time.now.utc)
487
+ plugin.run(queue)
488
+ expect(queue.length).to eq(0) # Shouldn't grab anything here either.
489
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
490
+ test_table.insert(:num => nums[2], :created_at => Time.now.utc)
491
+ test_table.insert(:num => nums[3], :created_at => Time.now.utc)
492
+ test_table.insert(:num => nums[4], :created_at => Time.now.utc)
493
+ plugin.run(queue)
494
+ expect(queue.length).to eq(3) # Only values greater than 20 should be grabbed.
495
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
496
+ expect(plugin.instance_variable_get("@tracking_column_warning_sent")).to eq(true)
497
+ end
498
+ end
499
+
500
+ context "when previous runs are to be respected upon successful query execution (by time)" do
501
+
502
+ let(:settings) do
503
+ { "statement" => "SELECT 1 as num_param FROM SYSIBM.SYSDUMMY1",
504
+ "last_run_metadata_path" => Stud::Temporary.pathname }
505
+ end
506
+
507
+ let(:last_run_time) { Time.now.utc }
340
508
 
341
509
  before do
342
510
  File.write(settings["last_run_metadata_path"], YAML.dump(last_run_time))
@@ -348,11 +516,91 @@ describe LogStash::Inputs::Jdbc do
348
516
  end
349
517
 
350
518
  it "should respect last run metadata" do
351
- expect(plugin.instance_variable_get("@sql_last_start")).to eq(last_run_time)
519
+ plugin.run(queue)
520
+
521
+ expect(plugin.instance_variable_get("@sql_last_value")).to be > last_run_time
522
+ end
523
+ end
524
+
525
+ context "when previous runs are to be respected upon successful query execution (by column)" do
526
+
527
+ let(:settings) do
528
+ { "statement" => "SELECT 1 as num_param FROM SYSIBM.SYSDUMMY1",
529
+ "use_column_value" => true,
530
+ "tracking_column" => "num_param",
531
+ "last_run_metadata_path" => Stud::Temporary.pathname }
532
+ end
533
+
534
+ let(:last_run_value) { 1 }
535
+
536
+ before do
537
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
538
+ plugin.register
539
+ end
540
+
541
+ after do
542
+ plugin.stop
543
+ end
544
+
545
+ it "metadata should equal last_run_value" do
546
+ plugin.run(queue)
547
+
548
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(last_run_value)
549
+ end
550
+ end
551
+
552
+ context "when previous runs are to be respected upon query failure (by time)" do
553
+ let(:settings) do
554
+ { "statement" => "SELECT col from non_existent_table",
555
+ "last_run_metadata_path" => Stud::Temporary.pathname }
556
+ end
557
+
558
+ let(:last_run_time) { Time.now.utc }
559
+
560
+ before do
561
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_time))
562
+ plugin.register
563
+ end
564
+
565
+ after do
566
+ plugin.stop
567
+ end
568
+
569
+ it "should not respect last run metadata" do
570
+ plugin.run(queue)
571
+
572
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(last_run_time)
352
573
  end
353
574
  end
354
575
 
355
- context "when doing a clean run" do
576
+ context "when previous runs are to be respected upon query failure (by column)" do
577
+ let(:settings) do
578
+ { "statement" => "SELECT col from non_existent_table",
579
+ "use_column_value" => true,
580
+ "tracking_column" => "num_param",
581
+ "last_run_metadata_path" => Stud::Temporary.pathname
582
+ }
583
+ end
584
+
585
+ let(:last_run_value) { 1 }
586
+
587
+ before do
588
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
589
+ plugin.register
590
+ end
591
+
592
+ after do
593
+ plugin.stop
594
+ end
595
+
596
+ it "metadata should still reflect last value" do
597
+ plugin.run(queue)
598
+
599
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(last_run_value)
600
+ end
601
+ end
602
+
603
+ context "when doing a clean run (by time)" do
356
604
 
357
605
  let(:settings) do
358
606
  {
@@ -374,10 +622,39 @@ describe LogStash::Inputs::Jdbc do
374
622
  end
375
623
 
376
624
  it "should ignore last run metadata if :clean_run set to true" do
377
- expect(plugin.instance_variable_get("@sql_last_start")).to eq(Time.at(0).utc)
625
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(Time.at(0).utc)
378
626
  end
379
627
  end
380
628
 
629
+ context "when doing a clean run (by value)" do
630
+
631
+ let(:settings) do
632
+ {
633
+ "statement" => "SELECT * FROM test_table",
634
+ "last_run_metadata_path" => Stud::Temporary.pathname,
635
+ "use_column_value" => true,
636
+ "tracking_column" => "num_param",
637
+ "clean_run" => true
638
+ }
639
+ end
640
+
641
+ let(:last_run_value) { 1000 }
642
+
643
+ before do
644
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
645
+ plugin.register
646
+ end
647
+
648
+ after do
649
+ plugin.stop
650
+ end
651
+
652
+ it "should ignore last run metadata if :clean_run set to true" do
653
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(0)
654
+ end
655
+ end
656
+
657
+
381
658
  context "when state is not to be persisted" do
382
659
  let(:settings) do
383
660
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-jdbc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-17 00:00:00.000000000 Z
11
+ date: 2016-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logstash-core