logstash-input-file 4.1.16 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -0
  3. data/LICENSE +199 -10
  4. data/docs/index.asciidoc +23 -7
  5. data/lib/filewatch/discoverer.rb +9 -8
  6. data/lib/filewatch/observing_base.rb +1 -12
  7. data/lib/filewatch/processor.rb +55 -0
  8. data/lib/filewatch/read_mode/handlers/base.rb +8 -6
  9. data/lib/filewatch/read_mode/handlers/read_file.rb +26 -8
  10. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +63 -34
  11. data/lib/filewatch/read_mode/processor.rb +22 -36
  12. data/lib/filewatch/settings.rb +3 -2
  13. data/lib/filewatch/sincedb_collection.rb +23 -21
  14. data/lib/filewatch/stat/generic.rb +8 -13
  15. data/lib/filewatch/stat/windows_path.rb +7 -9
  16. data/lib/filewatch/tail_mode/handlers/delete.rb +2 -4
  17. data/lib/filewatch/tail_mode/processor.rb +47 -54
  18. data/lib/filewatch/watch.rb +12 -14
  19. data/lib/filewatch/watched_file.rb +25 -14
  20. data/lib/filewatch/watched_files_collection.rb +11 -74
  21. data/lib/jars/filewatch-1.0.1.jar +0 -0
  22. data/lib/logstash/inputs/delete_completed_file_handler.rb +5 -0
  23. data/lib/logstash/inputs/file.rb +32 -11
  24. data/logstash-input-file.gemspec +3 -2
  25. data/spec/filewatch/reading_spec.rb +60 -9
  26. data/spec/filewatch/rotate_spec.rb +2 -1
  27. data/spec/filewatch/settings_spec.rb +3 -0
  28. data/spec/filewatch/spec_helper.rb +13 -15
  29. data/spec/filewatch/tailing_spec.rb +14 -12
  30. data/spec/filewatch/watched_file_spec.rb +30 -0
  31. data/spec/filewatch/watched_files_collection_spec.rb +62 -8
  32. data/spec/helpers/spec_helper.rb +8 -0
  33. data/spec/inputs/file_read_spec.rb +154 -4
  34. data/spec/inputs/file_tail_spec.rb +3 -2
  35. metadata +21 -6
@@ -4,15 +4,18 @@ require_relative 'spec_helper'
4
4
  module FileWatch
5
5
  describe WatchedFilesCollection do
6
6
  let(:time) { Time.now }
7
- let(:filepath1){"/var/log/z.log"}
8
- let(:filepath2){"/var/log/m.log"}
9
- let(:filepath3){"/var/log/a.log"}
10
- let(:stat1) { double("stat1", :size => 98, :modified_at => time - 30, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) }
11
- let(:stat2) { double("stat2", :size => 99, :modified_at => time - 20, :identifier => nil, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) }
12
- let(:stat3) { double("stat3", :size => 100, :modified_at => time, :identifier => nil, :inode => 234569, :inode_struct => InodeStruct.new("234569", 3, 2)) }
7
+ let(:filepath1) { "/var/log/z.log" }
8
+ let(:filepath2) { "/var/log/m.log" }
9
+ let(:filepath3) { "/var/log/a.log" }
10
+ let(:filepath4) { "/var/log/b.log" }
11
+ let(:stat1) { double("stat1", :size => 98, :modified_at => time - 30, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) }
12
+ let(:stat2) { double("stat2", :size => 99, :modified_at => time - 20, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) }
13
+ let(:stat3) { double("stat3", :size => 100, :modified_at => time, :inode => 234569, :inode_struct => InodeStruct.new("234569", 3, 2)) }
14
+ let(:stat4) { double("stat4", :size => 99, :modified_at => time, :inode => 234570, :inode_struct => InodeStruct.new("234570", 3, 2)) }
13
15
  let(:wf1) { WatchedFile.new(filepath1, stat1, Settings.new) }
14
16
  let(:wf2) { WatchedFile.new(filepath2, stat2, Settings.new) }
15
17
  let(:wf3) { WatchedFile.new(filepath3, stat3, Settings.new) }
18
+ let(:wf4) { WatchedFile.new(filepath4, stat4, Settings.new) }
16
19
 
17
20
  context "sort by last_modified in ascending order" do
18
21
  let(:sort_by) { "last_modified" }
@@ -20,12 +23,29 @@ module FileWatch
20
23
 
21
24
  it "sorts earliest modified first" do
22
25
  collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction))
26
+ expect(collection.empty?).to be true
23
27
  collection.add(wf2)
28
+ expect(collection.empty?).to be false
24
29
  expect(collection.values).to eq([wf2])
25
30
  collection.add(wf3)
26
31
  expect(collection.values).to eq([wf2, wf3])
27
32
  collection.add(wf1)
28
33
  expect(collection.values).to eq([wf1, wf2, wf3])
34
+ expect(collection.keys.size).to eq 3
35
+ end
36
+
37
+ it "sorts by path when mtime is same" do
38
+ collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction))
39
+ expect(collection.size).to eq 0
40
+ collection.add(wf2)
41
+ collection.add(wf4)
42
+ collection.add(wf1)
43
+ expect(collection.size).to eq 3
44
+ expect(collection.values).to eq([wf1, wf2, wf4])
45
+ collection.add(wf3)
46
+ expect(collection.size).to eq 4
47
+ expect(collection.values).to eq([wf1, wf2, wf3, wf4])
48
+ expect(collection.keys.size).to eq 4
29
49
  end
30
50
  end
31
51
 
@@ -74,7 +94,7 @@ module FileWatch
74
94
  end
75
95
  end
76
96
 
77
- context "when delete called" do
97
+ context "remove_paths" do
78
98
  let(:sort_by) { "path" }
79
99
  let(:sort_direction) { "desc" }
80
100
 
@@ -85,9 +105,43 @@ module FileWatch
85
105
  collection.add(wf3)
86
106
  expect(collection.keys).to eq([filepath1, filepath2, filepath3])
87
107
 
88
- collection.delete([filepath2,filepath3])
108
+ ret = collection.remove_paths([filepath2, filepath3])
109
+ expect(ret).to eq 2
89
110
  expect(collection.keys).to eq([filepath1])
111
+ expect(collection.values.size).to eq 1
112
+
113
+ ret = collection.remove_paths([filepath2])
114
+ expect(ret).to eq 0
115
+ end
116
+ end
117
+
118
+ context "update" do
119
+ let(:sort_by) { "last_modified" }
120
+ let(:sort_direction) { "asc" }
121
+
122
+ let(:re_stat1) { double("restat1", :size => 99, :modified_at => time, :inode => 234567, :inode_struct => InodeStruct.new("234567", 3, 2)) }
123
+ let(:re_stat2) { double("restat2", :size => 99, :modified_at => time, :inode => 234568, :inode_struct => InodeStruct.new("234568", 3, 2)) }
124
+
125
+ it "updates entry with changed mtime" do
126
+ collection = described_class.new(Settings.from_options(:file_sort_by => sort_by, :file_sort_direction => sort_direction))
127
+ collection.add(wf1)
128
+ collection.add(wf2)
129
+ collection.add(wf3)
130
+ expect(collection.files).to eq([wf1, wf2, wf3])
131
+
132
+ wf2.send(:set_stat, re_stat2)
133
+ expect( wf2.modified_at_changed? ).to be_truthy
134
+
135
+ collection.update wf2
136
+ expect(collection.files).to eq([wf1, wf3, wf2])
137
+
138
+ wf1.send(:set_stat, re_stat1)
139
+ expect( wf1.modified_at_changed? ).to be_truthy
140
+ collection.update wf1
141
+ expect(collection.files).to eq([wf3, wf2, wf1])
90
142
 
143
+ collection.add(wf4)
144
+ expect(collection.files).to eq([wf3, wf4, wf2, wf1])
91
145
  end
92
146
  end
93
147
 
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "logstash/devutils/rspec/spec_helper"
4
+ require "rspec/wait"
4
5
  require "rspec_sequencing"
5
6
 
6
7
  module FileInput
@@ -16,6 +17,13 @@ module FileInput
16
17
  ::File.utime(time, time, path)
17
18
  end
18
19
 
20
+ def self.corrupt_gzip(file_path)
21
+ f = File.open(file_path, "w")
22
+ f.seek(12)
23
+ f.puts 'corrupting_string'
24
+ f.close()
25
+ end
26
+
19
27
  class TracerBase
20
28
  def initialize
21
29
  @tracer = Concurrent::Array.new
@@ -70,7 +70,7 @@ describe LogStash::Inputs::File do
70
70
  end
71
71
 
72
72
  events = input(conf) do |pipeline, queue|
73
- wait(0.5).for{IO.read(log_completed_path)}.to match(/A\.log/)
73
+ wait(0.75).for { IO.read(log_completed_path) }.to match(/A\.log/)
74
74
  2.times.collect { queue.pop }
75
75
  end
76
76
  expect(events.map{|e| e.get("message")}).to contain_exactly("hello", "world")
@@ -137,7 +137,7 @@ describe LogStash::Inputs::File do
137
137
  CONFIG
138
138
 
139
139
  events = input(conf) do |pipeline, queue|
140
- wait(0.5).for{IO.read(log_completed_path)}.to match(/#{file_path.to_s}/)
140
+ wait(0.75).for { IO.read(log_completed_path) }.to match(/#{file_path.to_s}/)
141
141
  2.times.collect { queue.pop }
142
142
  end
143
143
 
@@ -171,7 +171,7 @@ describe LogStash::Inputs::File do
171
171
  CONFIG
172
172
 
173
173
  events = input(conf) do |pipeline, queue|
174
- wait(0.5).for{IO.read(log_completed_path)}.to match(/uncompressed\.log/)
174
+ wait(0.75).for{ IO.read(log_completed_path) }.to match(/uncompressed\.log/)
175
175
  2.times.collect { queue.pop }
176
176
  end
177
177
 
@@ -205,7 +205,7 @@ describe LogStash::Inputs::File do
205
205
  CONFIG
206
206
 
207
207
  events = input(conf) do |pipeline, queue|
208
- wait(0.5).for{IO.read(log_completed_path).scan(/compressed\.log\.gz(ip)?/).size}.to eq(2)
208
+ wait(0.75).for { IO.read(log_completed_path).scan(/compressed\.log\.gz(ip)?/).size }.to eq(2)
209
209
  4.times.collect { queue.pop }
210
210
  end
211
211
 
@@ -214,6 +214,156 @@ describe LogStash::Inputs::File do
214
214
  expect(events[2].get("message")).to start_with("2010-03-12 23:51")
215
215
  expect(events[3].get("message")).to start_with("2010-03-12 23:51")
216
216
  end
217
+
218
+ it "the corrupted file is untouched" do
219
+ directory = Stud::Temporary.directory
220
+ file_path = fixture_dir.join('compressed.log.gz')
221
+ corrupted_file_path = ::File.join(directory, 'corrupted.gz')
222
+ FileUtils.cp(file_path, corrupted_file_path)
223
+
224
+ FileInput.corrupt_gzip(corrupted_file_path)
225
+
226
+ log_completed_path = ::File.join(directory, "C_completed.txt")
227
+ f = File.new(log_completed_path, "w")
228
+ f.close()
229
+
230
+ conf = <<-CONFIG
231
+ input {
232
+ file {
233
+ type => "blah"
234
+ path => "#{corrupted_file_path}"
235
+ mode => "read"
236
+ file_completed_action => "log_and_delete"
237
+ file_completed_log_path => "#{log_completed_path}"
238
+ check_archive_validity => true
239
+ }
240
+ }
241
+ CONFIG
242
+
243
+ events = input(conf) do |pipeline, queue|
244
+ wait(1)
245
+ expect(IO.read(log_completed_path)).to be_empty
246
+ end
247
+ end
248
+ end
249
+ end
250
+
251
+ let(:temp_directory) { Stud::Temporary.directory }
252
+ let(:interval) { 0.1 }
253
+ let(:options) do
254
+ {
255
+ 'mode' => "read",
256
+ 'path' => "#{temp_directory}/*",
257
+ 'stat_interval' => interval,
258
+ 'discover_interval' => interval,
259
+ 'sincedb_path' => "#{temp_directory}/.sincedb",
260
+ 'sincedb_write_interval' => interval
261
+ }
262
+ end
263
+
264
+ let(:queue) { Queue.new }
265
+ let(:plugin) { LogStash::Inputs::File.new(options) }
266
+
267
+ describe 'delete on complete' do
268
+
269
+ let(:options) do
270
+ super.merge({ 'file_completed_action' => "delete", 'exit_after_read' => false })
271
+ end
272
+
273
+ let(:sample_file) { File.join(temp_directory, "sample.log") }
274
+
275
+ before do
276
+ plugin.register
277
+ @run_thread = Thread.new(plugin) do |plugin|
278
+ Thread.current.abort_on_exception = true
279
+ plugin.run queue
280
+ end
281
+
282
+ File.open(sample_file, 'w') { |fd| fd.write("sample-content\n") }
283
+
284
+ wait_for_start_processing(@run_thread)
217
285
  end
286
+
287
+ after { plugin.stop }
288
+
289
+ it 'processes a file' do
290
+ wait_for_file_removal(sample_file) # watched discovery
291
+
292
+ expect( plugin.queue.size ).to eql 1
293
+ event = plugin.queue.pop
294
+ expect( event.get('message') ).to eql 'sample-content'
295
+ end
296
+
297
+ it 'removes watched file from collection' do
298
+ wait_for_file_removal(sample_file) # watched discovery
299
+ sleep(0.25) # give CI some space to execute the removal
300
+ # TODO shouldn't be necessary once WatchedFileCollection does proper locking
301
+ watched_files = plugin.watcher.watch.watched_files_collection
302
+ expect( watched_files ).to be_empty
303
+ end
304
+ end
305
+
306
+ describe 'sincedb cleanup' do
307
+
308
+ let(:options) do
309
+ super.merge(
310
+ 'sincedb_path' => sincedb_path,
311
+ 'sincedb_clean_after' => '1.0 seconds',
312
+ 'sincedb_write_interval' => 0.25,
313
+ 'stat_interval' => 0.1,
314
+ )
315
+ end
316
+
317
+ let(:sincedb_path) { "#{temp_directory}/.sincedb" }
318
+
319
+ let(:sample_file) { File.join(temp_directory, "sample.txt") }
320
+
321
+ before do
322
+ plugin.register
323
+ @run_thread = Thread.new(plugin) do |plugin|
324
+ Thread.current.abort_on_exception = true
325
+ plugin.run queue
326
+ end
327
+
328
+ File.open(sample_file, 'w') { |fd| fd.write("line1\nline2\n") }
329
+
330
+ wait_for_start_processing(@run_thread)
331
+ end
332
+
333
+ after { plugin.stop }
334
+
335
+ it 'cleans up sincedb entry' do
336
+ wait_for_file_removal(sample_file) # watched discovery
337
+
338
+ sincedb_content = File.read(sincedb_path).strip
339
+ expect( sincedb_content ).to_not be_empty
340
+
341
+ Stud.try(3.times) do
342
+ sleep(1.5) # > sincedb_clean_after
343
+
344
+ sincedb_content = File.read(sincedb_path).strip
345
+ expect( sincedb_content ).to be_empty
346
+ end
347
+ end
348
+
349
+ end
350
+
351
+ private
352
+
353
+ def wait_for_start_processing(run_thread, timeout: 1.0)
354
+ begin
355
+ Timeout.timeout(timeout) do
356
+ sleep(0.01) while run_thread.status != 'sleep'
357
+ sleep(timeout) unless plugin.queue
358
+ end
359
+ rescue Timeout::Error
360
+ raise "plugin did not start processing (timeout: #{timeout})" unless plugin.queue
361
+ else
362
+ raise "plugin did not start processing" unless plugin.queue
363
+ end
364
+ end
365
+
366
+ def wait_for_file_removal(path, timeout: 3 * interval)
367
+ wait(timeout).for { File.exist?(path) }.to be_falsey
218
368
  end
219
369
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "helpers/spec_helper"
4
+ require "logstash/devutils/rspec/shared_examples"
4
5
  require "logstash/inputs/file"
5
6
 
6
7
  require "tempfile"
@@ -66,7 +67,7 @@ describe LogStash::Inputs::File do
66
67
  path => "#{path_path}"
67
68
  start_position => "beginning"
68
69
  sincedb_path => "#{sincedb_path}"
69
- "file_sort_by" => "path"
70
+ file_sort_by => "path"
70
71
  delimiter => "#{TEST_FILE_DELIMITER}"
71
72
  }
72
73
  }
@@ -175,7 +176,7 @@ describe LogStash::Inputs::File do
175
176
  context "when sincedb_path is a directory" do
176
177
  let(:name) { "E" }
177
178
  subject { LogStash::Inputs::File.new("path" => path_path, "sincedb_path" => directory) }
178
-
179
+
179
180
  after :each do
180
181
  FileUtils.rm_rf(sincedb_path)
181
182
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.16
4
+ version: 4.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-20 00:00:00.000000000 Z
11
+ date: 2020-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,6 +58,20 @@ dependencies:
58
58
  - - ">="
59
59
  - !ruby/object:Gem::Version
60
60
  version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '1.0'
67
+ name: concurrent-ruby
68
+ prerelease: false
69
+ type: :runtime
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.0'
61
75
  - !ruby/object:Gem::Dependency
62
76
  requirement: !ruby/object:Gem::Requirement
63
77
  requirements:
@@ -89,17 +103,17 @@ dependencies:
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
92
- - - "~>"
106
+ - - ">="
93
107
  - !ruby/object:Gem::Version
94
- version: '1.3'
108
+ version: '0'
95
109
  name: logstash-devutils
96
110
  prerelease: false
97
111
  type: :development
98
112
  version_requirements: !ruby/object:Gem::Requirement
99
113
  requirements:
100
- - - "~>"
114
+ - - ">="
101
115
  - !ruby/object:Gem::Version
102
- version: '1.3'
116
+ version: '0'
103
117
  - !ruby/object:Gem::Dependency
104
118
  requirement: !ruby/object:Gem::Requirement
105
119
  requirements:
@@ -178,6 +192,7 @@ files:
178
192
  - lib/filewatch/observing_base.rb
179
193
  - lib/filewatch/observing_read.rb
180
194
  - lib/filewatch/observing_tail.rb
195
+ - lib/filewatch/processor.rb
181
196
  - lib/filewatch/read_mode/handlers/base.rb
182
197
  - lib/filewatch/read_mode/handlers/read_file.rb
183
198
  - lib/filewatch/read_mode/handlers/read_zip_file.rb