logstash-input-jdbc 2.1.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6fc9b06fd127404a35d42af320ad90b37f0ff642
4
- data.tar.gz: 21798f6ed0c0620f054acc6abe0c207211fadc70
3
+ metadata.gz: bf7a5bf46a7c317d17c7e12169046979b19242c1
4
+ data.tar.gz: 26fe05b541b4a84b764e08d6f9527bda90329639
5
5
  SHA512:
6
- metadata.gz: bc1d4d90e171555d00334ab59ef67699bdf55e714557ef565a9b47d870b20d233f23fe7f0a48ed0025396ab822a9e18971d7eed1692027eac4971ced58f1ffb6
7
- data.tar.gz: aeeb2afa34a2767d035c20e71c0efdd709f9df48988954761d064e542def704ba7c4ec8e52248187eb8df8847f6db1bfb5c60f081db2b1bd8709f6bb908210f3
6
+ metadata.gz: 955833f1f3abe24e373a193cac7e34d27de4ba0245cfb7210955e45a4a650c512ecd8e1f2044de746d057b386123cb2bd2be5efaba705b9e1e581b8d2ee268cf
7
+ data.tar.gz: d36bc646854ef3409fe41055eca173eabcb9ea19913738d658fded3ae410c95f3169c9e9b1ff3ff8fc8a2532c9c48e779588e140543bc9a613a5606a9fcca6e2
@@ -1,3 +1,6 @@
1
+ ## 3.0.0
2
+ - [#57](https://github.com/logstash-plugins/logstash-input-jdbc/issues/57) New feature: Allow tracking by a column value rather than by last run time. **This is a breaking change**, as users may be required to change from using `sql_last_start` to use `sql_last_value` in their queries. No other changes are required if you've been using time-based queries. See the documentation if you wish to use an incremental column value to track updates to your tables.
3
+
1
4
  ## 2.1.1
2
5
  - [#44](https://github.com/logstash-plugins/logstash-input-jdbc/issues/44) add option to control the lowercase or not, of the column names.
3
6
 
@@ -35,12 +35,12 @@ require "yaml" # persistence
35
35
  #
36
36
  # ==== State
37
37
  #
38
- # The plugin will persist the `sql_last_start` parameter in the form of a
39
- # metadata file stored in the configured `last_run_metadata_path`. Upon shutting down,
40
- # this file will be updated with the current value of `sql_last_start`. Next time
38
+ # The plugin will persist the `sql_last_value` parameter in the form of a
39
+ # metadata file stored in the configured `last_run_metadata_path`. Upon query execution,
40
+ # this file will be updated with the current value of `sql_last_value`. Next time
41
41
  # the pipeline starts up, this value will be updated by reading from the file. If
42
- # `clean_run` is set to true, this value will be ignored and `sql_last_start` will be
43
- # set to Jan 1, 1970, as if no query has ever been executed.
42
+ # `clean_run` is set to true, this value will be ignored and `sql_last_value` will be
43
+ # set to Jan 1, 1970, or 0 if `use_column_value` is true, as if no query has ever been executed.
44
44
  #
45
45
  # ==== Dealing With Large Result-sets
46
46
  #
@@ -90,8 +90,9 @@ require "yaml" # persistence
90
90
  # Here is the list:
91
91
  #
92
92
  # |==========================================================
93
- # |sql_last_start | The last time a statement was executed. This is set to Thursday, 1 January 1970
94
- # before any query is run, and updated accordingly after first query is run.
93
+ # |sql_last_value | The value used to calculate which rows to query. Before any query is run,
94
+ # this is set to Thursday, 1 January 1970, or 0 if `use_column_value` is true and
95
+ # `tracking_column` is set. It is updated accordingly after subsequent queries are run.
95
96
  # |==========================================================
96
97
  #
97
98
  class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
@@ -131,6 +132,12 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
131
132
  # Path to file with last run time
132
133
  config :last_run_metadata_path, :validate => :string, :default => "#{ENV['HOME']}/.logstash_jdbc_last_run"
133
134
 
135
+ # Use an incremental column value rather than a timestamp
136
+ config :use_column_value, :validate => :boolean, :default => false
137
+
138
+ # If tracking column value rather than timestamp, the column whose value is to be tracked
139
+ config :tracking_column, :validate => :string
140
+
134
141
  # Whether the previous run state should be preserved
135
142
  config :clean_run, :validate => :boolean, :default => false
136
143
 
@@ -146,11 +153,18 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
146
153
  require "rufus/scheduler"
147
154
  prepare_jdbc_connection
148
155
 
149
- # load sql_last_start from file if exists
156
+ # Raise an error if @use_column_value is true, but no @tracking_column is set
157
+ if @use_column_value
158
+ if @tracking_column.nil?
159
+ raise(LogStash::ConfigurationError, "Must set :tracking_column if :use_column_value is true.")
160
+ end
161
+ end
162
+
163
+ # load sql_last_value from file if exists
150
164
  if @clean_run && File.exist?(@last_run_metadata_path)
151
165
  File.delete(@last_run_metadata_path)
152
166
  elsif File.exist?(@last_run_metadata_path)
153
- @sql_last_start = YAML.load(File.read(@last_run_metadata_path))
167
+ @sql_last_value = YAML.load(File.read(@last_run_metadata_path))
154
168
  end
155
169
 
156
170
  unless @statement.nil? ^ @statement_filepath.nil?
@@ -165,22 +179,19 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
165
179
  @scheduler = Rufus::Scheduler.new(:max_work_threads => 1)
166
180
  @scheduler.cron @schedule do
167
181
  execute_query(queue)
182
+ update_state_file
168
183
  end
169
184
 
170
185
  @scheduler.join
171
186
  else
172
187
  execute_query(queue)
188
+ update_state_file
173
189
  end
174
190
  end # def run
175
191
 
176
192
  def stop
177
193
  @scheduler.stop if @scheduler
178
194
 
179
- # update state file for next run
180
- if @record_last_run
181
- File.write(@last_run_metadata_path, YAML.dump(@sql_last_start))
182
- end
183
-
184
195
  close_jdbc_connection
185
196
  end
186
197
 
@@ -188,11 +199,18 @@ class LogStash::Inputs::Jdbc < LogStash::Inputs::Base
188
199
 
189
200
  def execute_query(queue)
190
201
  # update default parameters
191
- @parameters['sql_last_start'] = @sql_last_start
202
+ @parameters['sql_last_value'] = @sql_last_value
192
203
  execute_statement(@statement, @parameters) do |row|
193
204
  event = LogStash::Event.new(row)
194
205
  decorate(event)
195
206
  queue << event
196
207
  end
197
208
  end
209
+
210
+ def update_state_file
211
+ if @record_last_run
212
+ File.write(@last_run_metadata_path, YAML.dump(@sql_last_value))
213
+ end
214
+ end
215
+
198
216
  end # class LogStash::Inputs::Jdbc
@@ -161,7 +161,11 @@ module LogStash::PluginMixins::Jdbc
161
161
  else
162
162
  @database.identifier_output_method = :to_s
163
163
  end
164
- @sql_last_start = Time.at(0).utc
164
+ if @use_column_value
165
+ @sql_last_value = 0
166
+ else
167
+ @sql_last_value = Time.at(0).utc
168
+ end
165
169
  end # def prepare_jdbc_connection
166
170
 
167
171
  public
@@ -175,27 +179,47 @@ module LogStash::PluginMixins::Jdbc
175
179
  begin
176
180
  parameters = symbolized_params(parameters)
177
181
  query = @database[statement, parameters]
182
+ sql_last_value = @use_column_value ? @sql_last_value : Time.now.utc
183
+ @tracking_column_warning_sent = false
178
184
  @logger.debug? and @logger.debug("Executing JDBC query", :statement => statement, :parameters => parameters, :count => query.count)
179
- @sql_last_start = Time.now.utc
180
185
 
181
186
  if @jdbc_paging_enabled
182
187
  query.each_page(@jdbc_page_size) do |paged_dataset|
183
188
  paged_dataset.each do |row|
189
+ sql_last_value = get_column_value(row) if @use_column_value
184
190
  yield extract_values_from(row)
185
191
  end
186
192
  end
187
193
  else
188
194
  query.each do |row|
195
+ sql_last_value = get_column_value(row) if @use_column_value
189
196
  yield extract_values_from(row)
190
197
  end
191
198
  end
192
199
  success = true
193
200
  rescue Sequel::DatabaseConnectionError, Sequel::DatabaseError => e
194
201
  @logger.warn("Exception when executing JDBC query", :exception => e)
202
+ else
203
+ @sql_last_value = sql_last_value
195
204
  end
196
205
  return success
197
206
  end
198
207
 
208
+ public
209
+ def get_column_value(row)
210
+ if !row.has_key?(@tracking_column.to_sym)
211
+ if !@tracking_column_warning_sent
212
+ @logger.warn("tracking_column not found in dataset.", :tracking_column => @tracking_column)
213
+ @tracking_column_warning_sent = true
214
+ end
215
+ # If we can't find the tracking column, return the current value in the ivar
216
+ @sql_last_value
217
+ else
218
+ # Otherwise send the updated tracking column
219
+ row[@tracking_column.to_sym]
220
+ end
221
+ end
222
+
199
223
  # Symbolize parameters keys to use with Sequel
200
224
  private
201
225
  def symbolized_params(parameters)
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-jdbc'
3
- s.version = '2.1.1'
3
+ s.version = '3.0.0'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = "This example input streams a string at a definable interval."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -161,6 +161,40 @@ describe LogStash::Inputs::Jdbc do
161
161
 
162
162
  end
163
163
 
164
+ context "when scheduling and previous runs are to be preserved" do
165
+ let(:settings) do
166
+ {
167
+ "statement" => "SELECT 1 as num_param FROM SYSIBM.SYSDUMMY1",
168
+ "schedule" => "* * * * * UTC",
169
+ "last_run_metadata_path" => Stud::Temporary.pathname
170
+ }
171
+ end
172
+
173
+ let(:last_run_time) { Time.at(1).utc }
174
+
175
+ before do
176
+ plugin.register
177
+ end
178
+
179
+ it "should flush previous run metadata per query execution" do
180
+ Timecop.travel(Time.new(2000))
181
+ Timecop.scale(60)
182
+ runner = Thread.new do
183
+ plugin.run(queue)
184
+ end
185
+ sleep 1
186
+ for i in 0..1
187
+ sleep 1
188
+ updated_last_run = YAML.load(File.read(settings["last_run_metadata_path"]))
189
+ expect(updated_last_run).to be > last_run_time
190
+ last_run_time = updated_last_run
191
+ end
192
+
193
+ plugin.stop
194
+ end
195
+
196
+ end
197
+
164
198
  context "when iterating result-set via paging" do
165
199
 
166
200
  let(:settings) do
@@ -295,7 +329,7 @@ describe LogStash::Inputs::Jdbc do
295
329
 
296
330
  context "when iteratively running plugin#run" do
297
331
  let(:settings) do
298
- {"statement" => "SELECT num, created_at FROM test_table WHERE created_at > :sql_last_start"}
332
+ {"statement" => "SELECT num, created_at FROM test_table WHERE created_at > :sql_last_value"}
299
333
  end
300
334
 
301
335
  let(:nums) { [10, 20, 30, 40, 50] }
@@ -329,14 +363,148 @@ describe LogStash::Inputs::Jdbc do
329
363
  end
330
364
  end
331
365
 
332
- context "when previous runs are to be respected" do
366
+ context "when iteratively running plugin#run with tracking_column" do
367
+ let(:mixin_settings) do
368
+ { "jdbc_user" => ENV['USER'], "jdbc_driver_class" => "org.apache.derby.jdbc.EmbeddedDriver",
369
+ "jdbc_connection_string" => "jdbc:derby:memory:testdb;create=true"
370
+ }
371
+ end
333
372
 
334
373
  let(:settings) do
335
- { "statement" => "SELECT * FROM test_table",
374
+ { "statement" => "SELECT num, created_at FROM test_table WHERE num > :sql_last_value",
375
+ "use_column_value" => true,
376
+ "tracking_column" => "num",
336
377
  "last_run_metadata_path" => Stud::Temporary.pathname }
337
378
  end
338
379
 
339
- let(:last_run_time) { Time.at(1).utc }
380
+ let(:nums) { [10, 20, 30, 40, 50] }
381
+
382
+ before do
383
+ plugin.register
384
+ end
385
+
386
+ after do
387
+ plugin.stop
388
+ end
389
+
390
+ it "should successfully update sql_last_value" do
391
+ test_table = db[:test_table]
392
+
393
+ plugin.run(queue)
394
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(0)
395
+ test_table.insert(:num => nums[0], :created_at => Time.now.utc)
396
+ test_table.insert(:num => nums[1], :created_at => Time.now.utc)
397
+ plugin.run(queue)
398
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
399
+ test_table.insert(:num => nums[2], :created_at => Time.now.utc)
400
+ test_table.insert(:num => nums[3], :created_at => Time.now.utc)
401
+ test_table.insert(:num => nums[4], :created_at => Time.now.utc)
402
+ plugin.run(queue)
403
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(50)
404
+ end
405
+ end
406
+
407
+ context "when iteratively running plugin#run with tracking_column and stored metadata" do
408
+ let(:mixin_settings) do
409
+ { "jdbc_user" => ENV['USER'], "jdbc_driver_class" => "org.apache.derby.jdbc.EmbeddedDriver",
410
+ "jdbc_connection_string" => "jdbc:derby:memory:testdb;create=true"
411
+ }
412
+ end
413
+
414
+ let(:settings) do
415
+ { "statement" => "SELECT num, created_at FROM test_table WHERE num > :sql_last_value",
416
+ "use_column_value" => true,
417
+ "tracking_column" => "num",
418
+ "last_run_metadata_path" => Stud::Temporary.pathname }
419
+ end
420
+
421
+ let(:nums) { [10, 20, 30, 40, 50] }
422
+ let(:last_run_value) { 20 }
423
+
424
+ before do
425
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
426
+ plugin.register
427
+ end
428
+
429
+ after do
430
+ plugin.stop
431
+ end
432
+
433
+ it "should successfully update sql_last_value and only add appropriate events" do
434
+ test_table = db[:test_table]
435
+
436
+ plugin.run(queue)
437
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
438
+ expect(queue.length).to eq(0) # Shouldn't grab anything here.
439
+ test_table.insert(:num => nums[0], :created_at => Time.now.utc)
440
+ test_table.insert(:num => nums[1], :created_at => Time.now.utc)
441
+ plugin.run(queue)
442
+ expect(queue.length).to eq(0) # Shouldn't grab anything here either.
443
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
444
+ test_table.insert(:num => nums[2], :created_at => Time.now.utc)
445
+ test_table.insert(:num => nums[3], :created_at => Time.now.utc)
446
+ test_table.insert(:num => nums[4], :created_at => Time.now.utc)
447
+ plugin.run(queue)
448
+ expect(queue.length).to eq(3) # Only values greater than 20 should be grabbed.
449
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(50)
450
+ end
451
+ end
452
+
453
+ context "when iteratively running plugin#run with BAD tracking_column and stored metadata" do
454
+ let(:mixin_settings) do
455
+ { "jdbc_user" => ENV['USER'], "jdbc_driver_class" => "org.apache.derby.jdbc.EmbeddedDriver",
456
+ "jdbc_connection_string" => "jdbc:derby:memory:testdb;create=true"
457
+ }
458
+ end
459
+
460
+ let(:settings) do
461
+ { "statement" => "SELECT num, created_at FROM test_table WHERE num > :sql_last_value",
462
+ "use_column_value" => true,
463
+ "tracking_column" => "not_num",
464
+ "last_run_metadata_path" => Stud::Temporary.pathname }
465
+ end
466
+
467
+ let(:nums) { [10, 20, 30, 40, 50] }
468
+ let(:last_run_value) { 20 }
469
+
470
+ before do
471
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
472
+ plugin.register
473
+ end
474
+
475
+ after do
476
+ plugin.stop
477
+ end
478
+
479
+ it "should send a warning and not update sql_last_value" do
480
+ test_table = db[:test_table]
481
+
482
+ plugin.run(queue)
483
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
484
+ expect(queue.length).to eq(0) # Shouldn't grab anything here.
485
+ test_table.insert(:num => nums[0], :created_at => Time.now.utc)
486
+ test_table.insert(:num => nums[1], :created_at => Time.now.utc)
487
+ plugin.run(queue)
488
+ expect(queue.length).to eq(0) # Shouldn't grab anything here either.
489
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
490
+ test_table.insert(:num => nums[2], :created_at => Time.now.utc)
491
+ test_table.insert(:num => nums[3], :created_at => Time.now.utc)
492
+ test_table.insert(:num => nums[4], :created_at => Time.now.utc)
493
+ plugin.run(queue)
494
+ expect(queue.length).to eq(3) # Only values greater than 20 should be grabbed.
495
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(20)
496
+ expect(plugin.instance_variable_get("@tracking_column_warning_sent")).to eq(true)
497
+ end
498
+ end
499
+
500
+ context "when previous runs are to be respected upon successful query execution (by time)" do
501
+
502
+ let(:settings) do
503
+ { "statement" => "SELECT 1 as num_param FROM SYSIBM.SYSDUMMY1",
504
+ "last_run_metadata_path" => Stud::Temporary.pathname }
505
+ end
506
+
507
+ let(:last_run_time) { Time.now.utc }
340
508
 
341
509
  before do
342
510
  File.write(settings["last_run_metadata_path"], YAML.dump(last_run_time))
@@ -348,11 +516,91 @@ describe LogStash::Inputs::Jdbc do
348
516
  end
349
517
 
350
518
  it "should respect last run metadata" do
351
- expect(plugin.instance_variable_get("@sql_last_start")).to eq(last_run_time)
519
+ plugin.run(queue)
520
+
521
+ expect(plugin.instance_variable_get("@sql_last_value")).to be > last_run_time
522
+ end
523
+ end
524
+
525
+ context "when previous runs are to be respected upon successful query execution (by column)" do
526
+
527
+ let(:settings) do
528
+ { "statement" => "SELECT 1 as num_param FROM SYSIBM.SYSDUMMY1",
529
+ "use_column_value" => true,
530
+ "tracking_column" => "num_param",
531
+ "last_run_metadata_path" => Stud::Temporary.pathname }
532
+ end
533
+
534
+ let(:last_run_value) { 1 }
535
+
536
+ before do
537
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
538
+ plugin.register
539
+ end
540
+
541
+ after do
542
+ plugin.stop
543
+ end
544
+
545
+ it "metadata should equal last_run_value" do
546
+ plugin.run(queue)
547
+
548
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(last_run_value)
549
+ end
550
+ end
551
+
552
+ context "when previous runs are to be respected upon query failure (by time)" do
553
+ let(:settings) do
554
+ { "statement" => "SELECT col from non_existent_table",
555
+ "last_run_metadata_path" => Stud::Temporary.pathname }
556
+ end
557
+
558
+ let(:last_run_time) { Time.now.utc }
559
+
560
+ before do
561
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_time))
562
+ plugin.register
563
+ end
564
+
565
+ after do
566
+ plugin.stop
567
+ end
568
+
569
+ it "should not respect last run metadata" do
570
+ plugin.run(queue)
571
+
572
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(last_run_time)
352
573
  end
353
574
  end
354
575
 
355
- context "when doing a clean run" do
576
+ context "when previous runs are to be respected upon query failure (by column)" do
577
+ let(:settings) do
578
+ { "statement" => "SELECT col from non_existent_table",
579
+ "use_column_value" => true,
580
+ "tracking_column" => "num_param",
581
+ "last_run_metadata_path" => Stud::Temporary.pathname
582
+ }
583
+ end
584
+
585
+ let(:last_run_value) { 1 }
586
+
587
+ before do
588
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
589
+ plugin.register
590
+ end
591
+
592
+ after do
593
+ plugin.stop
594
+ end
595
+
596
+ it "metadata should still reflect last value" do
597
+ plugin.run(queue)
598
+
599
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(last_run_value)
600
+ end
601
+ end
602
+
603
+ context "when doing a clean run (by time)" do
356
604
 
357
605
  let(:settings) do
358
606
  {
@@ -374,10 +622,39 @@ describe LogStash::Inputs::Jdbc do
374
622
  end
375
623
 
376
624
  it "should ignore last run metadata if :clean_run set to true" do
377
- expect(plugin.instance_variable_get("@sql_last_start")).to eq(Time.at(0).utc)
625
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(Time.at(0).utc)
378
626
  end
379
627
  end
380
628
 
629
+ context "when doing a clean run (by value)" do
630
+
631
+ let(:settings) do
632
+ {
633
+ "statement" => "SELECT * FROM test_table",
634
+ "last_run_metadata_path" => Stud::Temporary.pathname,
635
+ "use_column_value" => true,
636
+ "tracking_column" => "num_param",
637
+ "clean_run" => true
638
+ }
639
+ end
640
+
641
+ let(:last_run_value) { 1000 }
642
+
643
+ before do
644
+ File.write(settings["last_run_metadata_path"], YAML.dump(last_run_value))
645
+ plugin.register
646
+ end
647
+
648
+ after do
649
+ plugin.stop
650
+ end
651
+
652
+ it "should ignore last run metadata if :clean_run set to true" do
653
+ expect(plugin.instance_variable_get("@sql_last_value")).to eq(0)
654
+ end
655
+ end
656
+
657
+
381
658
  context "when state is not to be persisted" do
382
659
  let(:settings) do
383
660
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-jdbc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-17 00:00:00.000000000 Z
11
+ date: 2016-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logstash-core