logstash-input-file 4.1.17 → 4.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/lib/filewatch/discoverer.rb +9 -8
  4. data/lib/filewatch/observing_base.rb +1 -12
  5. data/lib/filewatch/processor.rb +55 -0
  6. data/lib/filewatch/read_mode/handlers/base.rb +12 -11
  7. data/lib/filewatch/read_mode/handlers/read_file.rb +26 -8
  8. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +8 -6
  9. data/lib/filewatch/read_mode/processor.rb +22 -36
  10. data/lib/filewatch/settings.rb +1 -2
  11. data/lib/filewatch/sincedb_collection.rb +39 -40
  12. data/lib/filewatch/sincedb_record_serializer.rb +5 -11
  13. data/lib/filewatch/stat/generic.rb +8 -13
  14. data/lib/filewatch/stat/windows_path.rb +7 -9
  15. data/lib/filewatch/tail_mode/handlers/base.rb +32 -23
  16. data/lib/filewatch/tail_mode/handlers/delete.rb +2 -4
  17. data/lib/filewatch/tail_mode/handlers/shrink.rb +2 -3
  18. data/lib/filewatch/tail_mode/handlers/unignore.rb +4 -4
  19. data/lib/filewatch/tail_mode/processor.rb +47 -54
  20. data/lib/filewatch/watch.rb +12 -14
  21. data/lib/filewatch/watched_file.rb +25 -14
  22. data/lib/filewatch/watched_files_collection.rb +11 -74
  23. data/lib/jars/filewatch-1.0.1.jar +0 -0
  24. data/lib/logstash/inputs/delete_completed_file_handler.rb +5 -0
  25. data/lib/logstash/inputs/file.rb +28 -13
  26. data/lib/logstash/inputs/file_listener.rb +3 -14
  27. data/logstash-input-file.gemspec +2 -1
  28. data/spec/filewatch/reading_spec.rb +60 -9
  29. data/spec/filewatch/settings_spec.rb +3 -0
  30. data/spec/filewatch/sincedb_record_serializer_spec.rb +6 -2
  31. data/spec/filewatch/spec_helper.rb +12 -14
  32. data/spec/filewatch/tailing_spec.rb +14 -12
  33. data/spec/filewatch/watched_file_spec.rb +30 -0
  34. data/spec/filewatch/watched_files_collection_spec.rb +62 -8
  35. data/spec/helpers/spec_helper.rb +1 -0
  36. data/spec/inputs/file_read_spec.rb +119 -0
  37. metadata +17 -2
Binary file
@@ -2,8 +2,13 @@
2
2
 
3
3
  module LogStash module Inputs
4
4
  class DeleteCompletedFileHandler
5
+ def initialize(watch)
6
+ @watch = watch
7
+ end
8
+
5
9
  def handle(path)
6
10
  Pathname.new(path).unlink rescue nil
11
+ @watch.watched_files_collection.remove_paths([path])
7
12
  end
8
13
  end
9
14
  end end
@@ -6,6 +6,7 @@ require "logstash/codecs/identity_map_codec"
6
6
  require "pathname"
7
7
  require "socket" # for Socket.gethostname
8
8
  require "fileutils"
9
+ require "concurrent/atomic/atomic_reference"
9
10
 
10
11
  require_relative "file/patch"
11
12
  require_relative "file_listener"
@@ -247,6 +248,9 @@ class File < LogStash::Inputs::Base
247
248
  end
248
249
  end
249
250
 
251
+ # @private used in specs
252
+ attr_reader :watcher
253
+
250
254
  def register
251
255
  require "addressable/uri"
252
256
  require "digest/md5"
@@ -274,8 +278,6 @@ class File < LogStash::Inputs::Base
274
278
  :check_archive_validity => @check_archive_validity,
275
279
  }
276
280
 
277
- @completed_file_handlers = []
278
-
279
281
  @path.each do |path|
280
282
  if Pathname.new(path).relative?
281
283
  raise ArgumentError.new("File paths must be absolute, relative path specified: #{path}")
@@ -319,15 +321,10 @@ class File < LogStash::Inputs::Base
319
321
  @watcher_class = FileWatch::ObservingTail
320
322
  else
321
323
  @watcher_class = FileWatch::ObservingRead
322
- if @file_completed_action.include?('log')
323
- @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
324
- end
325
- if @file_completed_action.include?('delete')
326
- @completed_file_handlers << DeleteCompletedFileHandler.new
327
- end
328
324
  end
329
325
  @codec = LogStash::Codecs::IdentityMapCodec.new(@codec)
330
326
  @completely_stopped = Concurrent::AtomicBoolean.new
327
+ @queue = Concurrent::AtomicReference.new
331
328
  end # def register
332
329
 
333
330
  def completely_stopped?
@@ -335,8 +332,9 @@ class File < LogStash::Inputs::Base
335
332
  @completely_stopped.true?
336
333
  end
337
334
 
335
+ # The WatchedFile calls back here as `observer.listener_for(@path)`
336
+ # @param [String] path the identity
338
337
  def listener_for(path)
339
- # path is the identity
340
338
  FileListener.new(path, self)
341
339
  end
342
340
 
@@ -344,13 +342,25 @@ class File < LogStash::Inputs::Base
344
342
  # if the pipeline restarts this input,
345
343
  # make sure previous files are closed
346
344
  stop
345
+
347
346
  @watcher = @watcher_class.new(@filewatch_config)
347
+
348
+ @completed_file_handlers = []
349
+ if read_mode?
350
+ if @file_completed_action.include?('log')
351
+ @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
352
+ end
353
+ if @file_completed_action.include?('delete')
354
+ @completed_file_handlers << DeleteCompletedFileHandler.new(@watcher.watch)
355
+ end
356
+ end
357
+
348
358
  @path.each { |path| @watcher.watch_this(path) }
349
359
  end
350
360
 
351
361
  def run(queue)
352
362
  start_processing
353
- @queue = queue
363
+ @queue.set queue
354
364
  @watcher.subscribe(self) # halts here until quit is called
355
365
  # last action of the subscribe call is to write the sincedb
356
366
  exit_flush
@@ -361,18 +371,18 @@ class File < LogStash::Inputs::Base
361
371
  event.set("[@metadata][host]", @host)
362
372
  event.set("host", @host) unless event.include?("host")
363
373
  decorate(event)
364
- @queue << event
374
+ @queue.get << event
365
375
  end
366
376
 
367
377
  def handle_deletable_path(path)
368
378
  return if tail_mode?
369
379
  return if @completed_file_handlers.empty?
380
+ @logger.debug? && @logger.debug(__method__.to_s, :path => path)
370
381
  @completed_file_handlers.each { |handler| handler.handle(path) }
371
382
  end
372
383
 
373
384
  def log_line_received(path, line)
374
- return unless @logger.debug?
375
- @logger.debug("Received line", :path => path, :text => line)
385
+ @logger.debug? && @logger.debug("Received line", :path => path, :text => line)
376
386
  end
377
387
 
378
388
  def stop
@@ -382,6 +392,11 @@ class File < LogStash::Inputs::Base
382
392
  end
383
393
  end
384
394
 
395
+ # @private used in specs
396
+ def queue
397
+ @queue.get
398
+ end
399
+
385
400
  private
386
401
 
387
402
  def build_sincedb_base_from_settings(settings)
@@ -7,9 +7,9 @@ module LogStash module Inputs
7
7
  class FileListener
8
8
  attr_reader :input, :path, :data
9
9
  # construct with link back to the input plugin instance.
10
- def initialize(path, input)
10
+ def initialize(path, input, data = nil)
11
11
  @path, @input = path, input
12
- @data = nil
12
+ @data = data
13
13
  end
14
14
 
15
15
  def opened
@@ -36,7 +36,7 @@ module LogStash module Inputs
36
36
  def accept(data)
37
37
  # and push transient data filled dup listener downstream
38
38
  input.log_line_received(path, data)
39
- input.codec.accept(dup_adding_state(data))
39
+ input.codec.accept(self.class.new(path, input, data))
40
40
  end
41
41
 
42
42
  def process_event(event)
@@ -45,17 +45,6 @@ module LogStash module Inputs
45
45
  input.post_process_this(event)
46
46
  end
47
47
 
48
- def add_state(data)
49
- @data = data
50
- self
51
- end
52
-
53
- private
54
-
55
- # duplicate and add state for downstream
56
- def dup_adding_state(line)
57
- self.class.new(path, input).add_state(line)
58
- end
59
48
  end
60
49
 
61
50
  class FlushableListener < FileListener
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-file'
4
- s.version = '4.1.17'
4
+ s.version = '4.2.3'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -31,6 +31,7 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency 'addressable'
32
32
  end
33
33
 
34
+ s.add_runtime_dependency 'concurrent-ruby', '~> 1.0'
34
35
  s.add_runtime_dependency 'logstash-codec-multiline', ['~> 3.0']
35
36
 
36
37
  s.add_development_dependency 'stud', ['~> 0.0.19']
@@ -23,9 +23,12 @@ module FileWatch
23
23
  let(:start_new_files_at) { :end } # should be irrelevant for read mode
24
24
  let(:opts) do
25
25
  {
26
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at,
27
- :delimiter => "\n", :discover_interval => discover_interval,
28
- :ignore_older => 3600, :sincedb_path => sincedb_path
26
+ :stat_interval => stat_interval,
27
+ :start_new_files_at => start_new_files_at,
28
+ :delimiter => "\n",
29
+ :discover_interval => discover_interval,
30
+ :ignore_older => 3600,
31
+ :sincedb_path => sincedb_path
29
32
  }
30
33
  end
31
34
  let(:observer) { TestObserver.new }
@@ -147,6 +150,50 @@ module FileWatch
147
150
  end
148
151
  end
149
152
 
153
+ context "when watching directory with files and adding a new file" do
154
+ let(:file_path2) { ::File.join(directory, "2.log") }
155
+ let(:file_path3) { ::File.join(directory, "3.log") }
156
+
157
+ let(:opts) { super.merge(:file_sort_by => "last_modified") }
158
+ let(:lines) { [] }
159
+ let(:observer) { TestObserver.new(lines) }
160
+
161
+
162
+ let(:listener2) { observer.listener_for(file_path2) }
163
+ let(:listener3) { observer.listener_for(file_path3) }
164
+
165
+ let(:actions) do
166
+ RSpec::Sequencing.run("create12") do
167
+ File.open(file_path, "w") { |file| file.write("string11\nstring12") }
168
+ File.open(file_path2, "w") { |file| file.write("string21\nstring22") }
169
+ end
170
+ .then("watch") do
171
+ reading.watch_this(watch_dir)
172
+ end
173
+ .then("wait12") do
174
+ wait(2).for { listener1.calls.last == :delete && listener2.calls.last == :delete }.to eq(true)
175
+ end
176
+ .then_after(2, "create3") do
177
+ File.open(file_path3, "w") { |file| file.write("string31\nstring32") }
178
+ end
179
+ .then("wait3") do
180
+ wait(2).for { listener3.calls.last == :delete }.to eq(true)
181
+ end
182
+ .then("quit") do
183
+ reading.quit
184
+ end
185
+ end
186
+
187
+ it "reads all (3) files" do
188
+ actions.activate_quietly
189
+ reading.subscribe(observer)
190
+ actions.assert_no_errors
191
+ expect(lines.last).to eq 'string32'
192
+ expect(lines.sort).to eq %w(string11 string12 string21 string22 string31 string32)
193
+ expect( reading.watch.watched_files_collection.paths ).to eq [ file_path, file_path2, file_path3 ]
194
+ end
195
+ end
196
+
150
197
  context "when watching a directory with files using exit_after_read" do
151
198
  let(:opts) { super.merge(:exit_after_read => true, :max_open_files => 2) }
152
199
  let(:file_path3) { ::File.join(directory, "3.log") }
@@ -159,40 +206,45 @@ module FileWatch
159
206
  let(:listener6) { observer.listener_for(file_path6) }
160
207
 
161
208
  it "the file is read" do
162
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
209
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
163
210
  reading.watch_this(watch_dir)
164
211
  reading.subscribe(observer)
165
212
  expect(listener3.lines).to eq(["line1", "line2"])
166
213
  end
214
+
167
215
  it "multiple files are read" do
168
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
216
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
169
217
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
170
218
  reading.watch_this(watch_dir)
171
219
  reading.subscribe(observer)
172
220
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4"])
173
221
  end
222
+
174
223
  it "multiple files are read even if max_open_files is smaller then number of files" do
175
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
224
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
176
225
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
177
226
  File.open(file_path5, "w") { |file| file.write("line5\nline6\n") }
178
227
  reading.watch_this(watch_dir)
179
228
  reading.subscribe(observer)
180
229
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4", "line5", "line6"])
181
230
  end
231
+
182
232
  it "file as marked as reading_completed" do
183
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
233
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
184
234
  reading.watch_this(watch_dir)
185
235
  reading.subscribe(observer)
186
236
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
187
237
  end
238
+
188
239
  it "sincedb works correctly" do
189
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
240
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
190
241
  reading.watch_this(watch_dir)
191
242
  reading.subscribe(observer)
192
243
  sincedb_record_fields = File.read(sincedb_path).split(" ")
193
244
  position_field_index = 3
194
245
  expect(sincedb_record_fields[position_field_index]).to eq("12")
195
246
  end
247
+
196
248
  it "does not include new files added after start" do
197
249
  File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
198
250
  reading.watch_this(watch_dir)
@@ -201,7 +253,6 @@ module FileWatch
201
253
  expect(listener3.lines).to eq(["line1", "line2"])
202
254
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
203
255
  expect(listener6.calls).to eq([])
204
-
205
256
  end
206
257
 
207
258
  end
@@ -1,3 +1,6 @@
1
+ require 'logstash/devutils/rspec/spec_helper'
2
+ require 'logstash/inputs/friendly_durations'
3
+
1
4
  describe FileWatch::Settings do
2
5
 
3
6
  context "when create from options" do
@@ -9,7 +9,9 @@ module FileWatch
9
9
  let(:io) { StringIO.new }
10
10
  let(:db) { Hash.new }
11
11
 
12
- subject { SincedbRecordSerializer.new(SincedbRecordSerializer.days_to_seconds(14)) }
12
+ let(:sincedb_value_expiry) { SincedbRecordSerializer.days_to_seconds(14) }
13
+
14
+ subject { SincedbRecordSerializer.new(sincedb_value_expiry) }
13
15
 
14
16
  context "deserialize from IO" do
15
17
  it 'reads V1 records' do
@@ -82,8 +84,10 @@ module FileWatch
82
84
  end
83
85
 
84
86
  context "given a non default `sincedb_clean_after`" do
87
+
88
+ let(:sincedb_value_expiry) { SincedbRecordSerializer.days_to_seconds(2) }
89
+
85
90
  it "does not write expired db entries to an IO object" do
86
- subject.update_sincedb_value_expiry_from_days(2)
87
91
  one_day_ago = Time.now.to_f - (1.0*24*3600)
88
92
  three_days_ago = one_day_ago - (2.0*24*3600)
89
93
  db[InodeStruct.new("42424242", 2, 5)] = SincedbValue.new(42, one_day_ago)
@@ -117,17 +117,12 @@ module FileWatch
117
117
  class Listener
118
118
  attr_reader :path, :lines, :calls
119
119
 
120
- def initialize(path)
120
+ def initialize(path, lines)
121
121
  @path = path
122
- @lines = Concurrent::Array.new
122
+ @lines = lines || Concurrent::Array.new
123
123
  @calls = Concurrent::Array.new
124
124
  end
125
125
 
126
- def add_lines(lines)
127
- @lines = lines
128
- self
129
- end
130
-
131
126
  def accept(line)
132
127
  @lines << line
133
128
  @calls << :accept
@@ -161,12 +156,7 @@ module FileWatch
161
156
  attr_reader :listeners
162
157
 
163
158
  def initialize(combined_lines = nil)
164
- listener_proc = if combined_lines.nil?
165
- lambda{|k| Listener.new(k) }
166
- else
167
- lambda{|k| Listener.new(k).add_lines(combined_lines) }
168
- end
169
- @listeners = Concurrent::Hash.new {|hash, key| hash[key] = listener_proc.call(key) }
159
+ @listeners = Concurrent::Hash.new { |hash, key| hash[key] = new_listener(key, combined_lines) }
170
160
  end
171
161
 
172
162
  def listener_for(path)
@@ -174,6 +164,14 @@ module FileWatch
174
164
  end
175
165
 
176
166
  def clear
177
- @listeners.clear; end
167
+ @listeners.clear
168
+ end
169
+
170
+ private
171
+
172
+ def new_listener(path, lines = nil)
173
+ Listener.new(path, lines)
174
+ end
175
+
178
176
  end
179
177
  end
@@ -10,15 +10,19 @@ module FileWatch
10
10
  let(:file_path) { ::File.join(directory, "1#{suffix}.log") }
11
11
  let(:file_path2) { ::File.join(directory, "2#{suffix}.log") }
12
12
  let(:file_path3) { ::File.join(directory, "3#{suffix}.log") }
13
- let(:max) { 4095 }
13
+ let(:max) { 4095 }
14
14
  let(:stat_interval) { 0.1 }
15
15
  let(:discover_interval) { 4 }
16
16
  let(:start_new_files_at) { :end }
17
17
  let(:sincedb_path) { ::File.join(directory, "tailing.sdb") }
18
18
  let(:opts) do
19
19
  {
20
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_open_files => max,
21
- :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path,
20
+ :stat_interval => stat_interval,
21
+ :start_new_files_at => start_new_files_at,
22
+ :max_open_files => max,
23
+ :delimiter => "\n",
24
+ :discover_interval => discover_interval,
25
+ :sincedb_path => sincedb_path,
22
26
  :file_sort_by => "path"
23
27
  }
24
28
  end
@@ -30,12 +34,11 @@ module FileWatch
30
34
 
31
35
  before do
32
36
  directory
33
- wait(1.0).for{Dir.exist?(directory)}.to eq(true)
37
+ wait(1.0).for { Dir.exist?(directory) }.to eq(true)
34
38
  end
35
39
 
36
40
  after do
37
41
  FileUtils.rm_rf(directory)
38
- wait(1.0).for{Dir.exist?(directory)}.to eq(false)
39
42
  end
40
43
 
41
44
  describe "max open files (set to 1)" do
@@ -95,16 +98,16 @@ module FileWatch
95
98
  let(:actions) do
96
99
  RSpec::Sequencing
97
100
  .run("create file") do
98
- File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
101
+ File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
99
102
  end
100
103
  .then_after(0.1, "begin watching") do
101
104
  tailing.watch_this(watch_dir)
102
105
  end
103
- .then_after(2, "add content") do
104
- File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
106
+ .then_after(1.0, "add content") do
107
+ File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
105
108
  end
106
109
  .then("wait") do
107
- wait(0.75).for{listener1.lines}.to eq(["line1", "line2"])
110
+ wait(0.75).for { listener1.lines }.to_not be_empty
108
111
  end
109
112
  .then("quit") do
110
113
  tailing.quit
@@ -113,7 +116,6 @@ module FileWatch
113
116
 
114
117
  it "only the new content is read" do
115
118
  actions.activate_quietly
116
- tailing.watch_this(watch_dir)
117
119
  tailing.subscribe(observer)
118
120
  actions.assert_no_errors
119
121
  expect(listener1.calls).to eq([:open, :accept, :accept])
@@ -132,7 +134,7 @@ module FileWatch
132
134
  File.open(file_path, "wb") { |file| file.write("line1\nline2\n") }
133
135
  end
134
136
  .then("wait") do
135
- wait(0.75).for{listener1.lines.size}.to eq(2)
137
+ wait(0.75).for { listener1.lines }.to_not be_empty
136
138
  end
137
139
  .then("quit") do
138
140
  tailing.quit
@@ -154,7 +156,7 @@ module FileWatch
154
156
  # so when a stat is taken on the file an error is raised
155
157
  let(:suffix) { "E" }
156
158
  let(:quit_after) { 0.2 }
157
- let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
159
+ let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
158
160
  let(:watched_file) { WatchedFile.new(file_path, stat, tailing.settings) }
159
161
  before do
160
162
  allow(stat).to receive(:restat).and_raise(Errno::ENOENT)