logstash-input-file 4.1.16 → 4.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -0
  3. data/LICENSE +199 -10
  4. data/docs/index.asciidoc +23 -7
  5. data/lib/filewatch/discoverer.rb +9 -8
  6. data/lib/filewatch/observing_base.rb +1 -12
  7. data/lib/filewatch/processor.rb +55 -0
  8. data/lib/filewatch/read_mode/handlers/base.rb +8 -6
  9. data/lib/filewatch/read_mode/handlers/read_file.rb +26 -8
  10. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +63 -34
  11. data/lib/filewatch/read_mode/processor.rb +22 -36
  12. data/lib/filewatch/settings.rb +3 -2
  13. data/lib/filewatch/sincedb_collection.rb +23 -21
  14. data/lib/filewatch/stat/generic.rb +8 -13
  15. data/lib/filewatch/stat/windows_path.rb +7 -9
  16. data/lib/filewatch/tail_mode/handlers/delete.rb +2 -4
  17. data/lib/filewatch/tail_mode/processor.rb +47 -54
  18. data/lib/filewatch/watch.rb +12 -14
  19. data/lib/filewatch/watched_file.rb +25 -14
  20. data/lib/filewatch/watched_files_collection.rb +11 -74
  21. data/lib/jars/filewatch-1.0.1.jar +0 -0
  22. data/lib/logstash/inputs/delete_completed_file_handler.rb +5 -0
  23. data/lib/logstash/inputs/file.rb +32 -11
  24. data/logstash-input-file.gemspec +3 -2
  25. data/spec/filewatch/reading_spec.rb +60 -9
  26. data/spec/filewatch/rotate_spec.rb +2 -1
  27. data/spec/filewatch/settings_spec.rb +3 -0
  28. data/spec/filewatch/spec_helper.rb +13 -15
  29. data/spec/filewatch/tailing_spec.rb +14 -12
  30. data/spec/filewatch/watched_file_spec.rb +30 -0
  31. data/spec/filewatch/watched_files_collection_spec.rb +62 -8
  32. data/spec/helpers/spec_helper.rb +8 -0
  33. data/spec/inputs/file_read_spec.rb +154 -4
  34. data/spec/inputs/file_tail_spec.rb +3 -2
  35. metadata +21 -6
@@ -4,15 +4,18 @@ require_relative 'spec_helper'
4
4
  module FileWatch
5
5
  describe WatchedFilesCollection do
6
6
  let(:time) { Time.now }
7
- let(:filepath1){"/var/log/z.log"}
8
- let(:filepath2){"/var/log/m.log"}
9
- let(:filepath3){"/var/log/a.log"}
10
- let(:stat1) { double("stat1", :size => 98, :modified_at => time - 30, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) }
11
- let(:stat2) { double("stat2", :size => 99, :modified_at => time - 20, :identifier => nil, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) }
12
- let(:stat3) { double("stat3", :size => 100, :modified_at => time, :identifier => nil, :inode => 234569, :inode_struct => InodeStruct.new("234569", 3, 2)) }
7
+ let(:filepath1) { "/var/log/z.log" }
8
+ let(:filepath2) { "/var/log/m.log" }
9
+ let(:filepath3) { "/var/log/a.log" }
10
+ let(:filepath4) { "/var/log/b.log" }
11
+ let(:stat1) { double("stat1", :size => 98, :modified_at => time - 30, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) }
12
+ let(:stat2) { double("stat2", :size => 99, :modified_at => time - 20, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) }
13
+ let(:stat3) { double("stat3", :size => 100, :modified_at => time, :inode => 234569, :inode_struct => InodeStruct.new("234569", 3, 2)) }
14
+ let(:stat4) { double("stat4", :size => 99, :modified_at => time, :inode => 234570, :inode_struct => InodeStruct.new("234570", 3, 2)) }
13
15
  let(:wf1) { WatchedFile.new(filepath1, stat1, Settings.new) }
14
16
  let(:wf2) { WatchedFile.new(filepath2, stat2, Settings.new) }
15
17
  let(:wf3) { WatchedFile.new(filepath3, stat3, Settings.new) }
18
+ let(:wf4) { WatchedFile.new(filepath4, stat4, Settings.new) }
16
19
 
17
20
  context "sort by last_modified in ascending order" do
18
21
  let(:sort_by) { "last_modified" }
@@ -20,12 +23,29 @@ module FileWatch
20
23
 
21
24
  it "sorts earliest modified first" do
22
25
  collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction))
26
+ expect(collection.empty?).to be true
23
27
  collection.add(wf2)
28
+ expect(collection.empty?).to be false
24
29
  expect(collection.values).to eq([wf2])
25
30
  collection.add(wf3)
26
31
  expect(collection.values).to eq([wf2, wf3])
27
32
  collection.add(wf1)
28
33
  expect(collection.values).to eq([wf1, wf2, wf3])
34
+ expect(collection.keys.size).to eq 3
35
+ end
36
+
37
+ it "sorts by path when mtime is same" do
38
+ collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction))
39
+ expect(collection.size).to eq 0
40
+ collection.add(wf2)
41
+ collection.add(wf4)
42
+ collection.add(wf1)
43
+ expect(collection.size).to eq 3
44
+ expect(collection.values).to eq([wf1, wf2, wf4])
45
+ collection.add(wf3)
46
+ expect(collection.size).to eq 4
47
+ expect(collection.values).to eq([wf1, wf2, wf3, wf4])
48
+ expect(collection.keys.size).to eq 4
29
49
  end
30
50
  end
31
51
 
@@ -74,7 +94,7 @@ module FileWatch
74
94
  end
75
95
  end
76
96
 
77
- context "when delete called" do
97
+ context "remove_paths" do
78
98
  let(:sort_by) { "path" }
79
99
  let(:sort_direction) { "desc" }
80
100
 
@@ -85,9 +105,43 @@ module FileWatch
85
105
  collection.add(wf3)
86
106
  expect(collection.keys).to eq([filepath1, filepath2, filepath3])
87
107
 
88
- collection.delete([filepath2,filepath3])
108
+ ret = collection.remove_paths([filepath2, filepath3])
109
+ expect(ret).to eq 2
89
110
  expect(collection.keys).to eq([filepath1])
111
+ expect(collection.values.size).to eq 1
112
+
113
+ ret = collection.remove_paths([filepath2])
114
+ expect(ret).to eq 0
115
+ end
116
+ end
117
+
118
+ context "update" do
119
+ let(:sort_by) { "last_modified" }
120
+ let(:sort_direction) { "asc" }
121
+
122
+ let(:re_stat1) { double("restat1", :size => 99, :modified_at => time, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) }
123
+ let(:re_stat2) { double("restat2", :size => 99, :modified_at => time, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) }
124
+
125
+ it "updates entry with changed mtime" do
126
+ collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction))
127
+ collection.add(wf1)
128
+ collection.add(wf2)
129
+ collection.add(wf3)
130
+ expect(collection.files).to eq([wf1, wf2, wf3])
131
+
132
+ wf2.send(:set_stat, re_stat2)
133
+ expect( wf2.modified_at_changed? ).to be_truthy
134
+
135
+ collection.update wf2
136
+ expect(collection.files).to eq([wf1, wf3, wf2])
137
+
138
+ wf1.send(:set_stat, re_stat1)
139
+ expect( wf1.modified_at_changed? ).to be_truthy
140
+ collection.update wf1
141
+ expect(collection.files).to eq([wf3, wf2, wf1])
90
142
 
143
+ collection.add(wf4)
144
+ expect(collection.files).to eq([wf3, wf4, wf2, wf1])
91
145
  end
92
146
  end
93
147
 
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "logstash/devutils/rspec/spec_helper"
4
+ require "rspec/wait"
4
5
  require "rspec_sequencing"
5
6
 
6
7
  module FileInput
@@ -16,6 +17,13 @@ module FileInput
16
17
  ::File.utime(time, time, path)
17
18
  end
18
19
 
20
+ def self.corrupt_gzip(file_path)
21
+ f = File.open(file_path, "w")
22
+ f.seek(12)
23
+ f.puts 'corrupting_string'
24
+ f.close()
25
+ end
26
+
19
27
  class TracerBase
20
28
  def initialize
21
29
  @tracer = Concurrent::Array.new
@@ -70,7 +70,7 @@ describe LogStash::Inputs::File do
70
70
  end
71
71
 
72
72
  events = input(conf) do |pipeline, queue|
73
- wait(0.5).for{IO.read(log_completed_path)}.to match(/A\.log/)
73
+ wait(0.75).for { IO.read(log_completed_path) }.to match(/A\.log/)
74
74
  2.times.collect { queue.pop }
75
75
  end
76
76
  expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world")
@@ -137,7 +137,7 @@ describe LogStash::Inputs::File do
137
137
  CONFIG
138
138
 
139
139
  events = input(conf) do |pipeline, queue|
140
- wait(0.5).for{IO.read(log_completed_path)}.to match(/#{file_path.to_s}/)
140
+ wait(0.75).for { IO.read(log_completed_path) }.to match(/#{file_path.to_s}/)
141
141
  2.times.collect { queue.pop }
142
142
  end
143
143
 
@@ -171,7 +171,7 @@ describe LogStash::Inputs::File do
171
171
  CONFIG
172
172
 
173
173
  events = input(conf) do |pipeline, queue|
174
- wait(0.5).for{IO.read(log_completed_path)}.to match(/uncompressed\.log/)
174
+ wait(0.75).for{ IO.read(log_completed_path) }.to match(/uncompressed\.log/)
175
175
  2.times.collect { queue.pop }
176
176
  end
177
177
 
@@ -205,7 +205,7 @@ describe LogStash::Inputs::File do
205
205
  CONFIG
206
206
 
207
207
  events = input(conf) do |pipeline, queue|
208
- wait(0.5).for{IO.read(log_completed_path).scan(/compressed\.log\.gz(ip)?/).size}.to eq(2)
208
+ wait(0.75).for { IO.read(log_completed_path).scan(/compressed\.log\.gz(ip)?/).size }.to eq(2)
209
209
  4.times.collect { queue.pop }
210
210
  end
211
211
 
@@ -214,6 +214,156 @@ describe LogStash::Inputs::File do
214
214
  expect(events[2].get("message")).to start_with("2010-03-12 23:51")
215
215
  expect(events[3].get("message")).to start_with("2010-03-12 23:51")
216
216
  end
217
+
218
+ it "the corrupted file is untouched" do
219
+ directory = Stud::Temporary.directory
220
+ file_path = fixture_dir.join('compressed.log.gz')
221
+ corrupted_file_path = ::File.join(directory, 'corrupted.gz')
222
+ FileUtils.cp(file_path, corrupted_file_path)
223
+
224
+ FileInput.corrupt_gzip(corrupted_file_path)
225
+
226
+ log_completed_path = ::File.join(directory, "C_completed.txt")
227
+ f = File.new(log_completed_path, "w")
228
+ f.close()
229
+
230
+ conf = <<-CONFIG
231
+ input {
232
+ file {
233
+ type => "blah"
234
+ path => "#{corrupted_file_path}"
235
+ mode => "read"
236
+ file_completed_action => "log_and_delete"
237
+ file_completed_log_path => "#{log_completed_path}"
238
+ check_archive_validity => true
239
+ }
240
+ }
241
+ CONFIG
242
+
243
+ events = input(conf) do |pipeline, queue|
244
+ wait(1)
245
+ expect(IO.read(log_completed_path)).to be_empty
246
+ end
247
+ end
248
+ end
249
+ end
250
+
251
+ let(:temp_directory) { Stud::Temporary.directory }
252
+ let(:interval) { 0.1 }
253
+ let(:options) do
254
+ {
255
+ 'mode' => "read",
256
+ 'path' => "#{temp_directory}/*",
257
+ 'stat_interval' => interval,
258
+ 'discover_interval' => interval,
259
+ 'sincedb_path' => "#{temp_directory}/.sincedb",
260
+ 'sincedb_write_interval' => interval
261
+ }
262
+ end
263
+
264
+ let(:queue) { Queue.new }
265
+ let(:plugin) { LogStash::Inputs::File.new(options) }
266
+
267
+ describe 'delete on complete' do
268
+
269
+ let(:options) do
270
+ super.merge({ 'file_completed_action' => "delete", 'exit_after_read' => false })
271
+ end
272
+
273
+ let(:sample_file) { File.join(temp_directory, "sample.log") }
274
+
275
+ before do
276
+ plugin.register
277
+ @run_thread = Thread.new(plugin) do |plugin|
278
+ Thread.current.abort_on_exception = true
279
+ plugin.run queue
280
+ end
281
+
282
+ File.open(sample_file, 'w') { |fd| fd.write("sample-content\n") }
283
+
284
+ wait_for_start_processing(@run_thread)
217
285
  end
286
+
287
+ after { plugin.stop }
288
+
289
+ it 'processes a file' do
290
+ wait_for_file_removal(sample_file) # watched discovery
291
+
292
+ expect( plugin.queue.size ).to eql 1
293
+ event = plugin.queue.pop
294
+ expect( event.get('message') ).to eql 'sample-content'
295
+ end
296
+
297
+ it 'removes watched file from collection' do
298
+ wait_for_file_removal(sample_file) # watched discovery
299
+ sleep(0.25) # give CI some space to execute the removal
300
+ # TODO shouldn't be necessary once WatchedFileCollection does proper locking
301
+ watched_files = plugin.watcher.watch.watched_files_collection
302
+ expect( watched_files ).to be_empty
303
+ end
304
+ end
305
+
306
+ describe 'sincedb cleanup' do
307
+
308
+ let(:options) do
309
+ super.merge(
310
+ 'sincedb_path' => sincedb_path,
311
+ 'sincedb_clean_after' => '1.0 seconds',
312
+ 'sincedb_write_interval' => 0.25,
313
+ 'stat_interval' => 0.1,
314
+ )
315
+ end
316
+
317
+ let(:sincedb_path) { "#{temp_directory}/.sincedb" }
318
+
319
+ let(:sample_file) { File.join(temp_directory, "sample.txt") }
320
+
321
+ before do
322
+ plugin.register
323
+ @run_thread = Thread.new(plugin) do |plugin|
324
+ Thread.current.abort_on_exception = true
325
+ plugin.run queue
326
+ end
327
+
328
+ File.open(sample_file, 'w') { |fd| fd.write("line1\nline2\n") }
329
+
330
+ wait_for_start_processing(@run_thread)
331
+ end
332
+
333
+ after { plugin.stop }
334
+
335
+ it 'cleans up sincedb entry' do
336
+ wait_for_file_removal(sample_file) # watched discovery
337
+
338
+ sincedb_content = File.read(sincedb_path).strip
339
+ expect( sincedb_content ).to_not be_empty
340
+
341
+ Stud.try(3.times) do
342
+ sleep(1.5) # > sincedb_clean_after
343
+
344
+ sincedb_content = File.read(sincedb_path).strip
345
+ expect( sincedb_content ).to be_empty
346
+ end
347
+ end
348
+
349
+ end
350
+
351
+ private
352
+
353
+ def wait_for_start_processing(run_thread, timeout: 1.0)
354
+ begin
355
+ Timeout.timeout(timeout) do
356
+ sleep(0.01) while run_thread.status != 'sleep'
357
+ sleep(timeout) unless plugin.queue
358
+ end
359
+ rescue Timeout::Error
360
+ raise "plugin did not start processing (timeout: #{timeout})" unless plugin.queue
361
+ else
362
+ raise "plugin did not start processing" unless plugin.queue
363
+ end
364
+ end
365
+
366
+ def wait_for_file_removal(path, timeout: 3 * interval)
367
+ wait(timeout).for { File.exist?(path) }.to be_falsey
218
368
  end
219
369
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "helpers/spec_helper"
4
+ require "logstash/devutils/rspec/shared_examples"
4
5
  require "logstash/inputs/file"
5
6
 
6
7
  require "tempfile"
@@ -66,7 +67,7 @@ describe LogStash::Inputs::File do
66
67
  path => "#{path_path}"
67
68
  start_position => "beginning"
68
69
  sincedb_path => "#{sincedb_path}"
69
- "file_sort_by" => "path"
70
+ file_sort_by => "path"
70
71
  delimiter => "#{TEST_FILE_DELIMITER}"
71
72
  }
72
73
  }
@@ -175,7 +176,7 @@ describe LogStash::Inputs::File do
175
176
  context "when sincedb_path is a directory" do
176
177
  let(:name) { "E" }
177
178
  subject { LogStash::Inputs::File.new("path" => path_path, "sincedb_path" => directory) }
178
-
179
+
179
180
  after :each do
180
181
  FileUtils.rm_rf(sincedb_path)
181
182
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.16
4
+ version: 4.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-20 00:00:00.000000000 Z
11
+ date: 2020-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,6 +58,20 @@ dependencies:
58
58
  - - ">="
59
59
  - !ruby/object:Gem::Version
60
60
  version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '1.0'
67
+ name: concurrent-ruby
68
+ prerelease: false
69
+ type: :runtime
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.0'
61
75
  - !ruby/object:Gem::Dependency
62
76
  requirement: !ruby/object:Gem::Requirement
63
77
  requirements:
@@ -89,17 +103,17 @@ dependencies:
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
92
- - - "~>"
106
+ - - ">="
93
107
  - !ruby/object:Gem::Version
94
- version: '1.3'
108
+ version: '0'
95
109
  name: logstash-devutils
96
110
  prerelease: false
97
111
  type: :development
98
112
  version_requirements: !ruby/object:Gem::Requirement
99
113
  requirements:
100
- - - "~>"
114
+ - - ">="
101
115
  - !ruby/object:Gem::Version
102
- version: '1.3'
116
+ version: '0'
103
117
  - !ruby/object:Gem::Dependency
104
118
  requirement: !ruby/object:Gem::Requirement
105
119
  requirements:
@@ -178,6 +192,7 @@ files:
178
192
  - lib/filewatch/observing_base.rb
179
193
  - lib/filewatch/observing_read.rb
180
194
  - lib/filewatch/observing_tail.rb
195
+ - lib/filewatch/processor.rb
181
196
  - lib/filewatch/read_mode/handlers/base.rb
182
197
  - lib/filewatch/read_mode/handlers/read_file.rb
183
198
  - lib/filewatch/read_mode/handlers/read_zip_file.rb