logstash-input-file 4.1.17 → 4.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/lib/filewatch/discoverer.rb +9 -8
  4. data/lib/filewatch/observing_base.rb +1 -12
  5. data/lib/filewatch/processor.rb +55 -0
  6. data/lib/filewatch/read_mode/handlers/base.rb +12 -11
  7. data/lib/filewatch/read_mode/handlers/read_file.rb +26 -8
  8. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +8 -6
  9. data/lib/filewatch/read_mode/processor.rb +22 -36
  10. data/lib/filewatch/settings.rb +1 -2
  11. data/lib/filewatch/sincedb_collection.rb +39 -40
  12. data/lib/filewatch/sincedb_record_serializer.rb +5 -11
  13. data/lib/filewatch/stat/generic.rb +8 -13
  14. data/lib/filewatch/stat/windows_path.rb +7 -9
  15. data/lib/filewatch/tail_mode/handlers/base.rb +32 -23
  16. data/lib/filewatch/tail_mode/handlers/delete.rb +2 -4
  17. data/lib/filewatch/tail_mode/handlers/shrink.rb +2 -3
  18. data/lib/filewatch/tail_mode/handlers/unignore.rb +4 -4
  19. data/lib/filewatch/tail_mode/processor.rb +47 -54
  20. data/lib/filewatch/watch.rb +12 -14
  21. data/lib/filewatch/watched_file.rb +25 -14
  22. data/lib/filewatch/watched_files_collection.rb +11 -74
  23. data/lib/jars/filewatch-1.0.1.jar +0 -0
  24. data/lib/logstash/inputs/delete_completed_file_handler.rb +5 -0
  25. data/lib/logstash/inputs/file.rb +28 -13
  26. data/lib/logstash/inputs/file_listener.rb +3 -14
  27. data/logstash-input-file.gemspec +2 -1
  28. data/spec/filewatch/reading_spec.rb +60 -9
  29. data/spec/filewatch/settings_spec.rb +3 -0
  30. data/spec/filewatch/sincedb_record_serializer_spec.rb +6 -2
  31. data/spec/filewatch/spec_helper.rb +12 -14
  32. data/spec/filewatch/tailing_spec.rb +14 -12
  33. data/spec/filewatch/watched_file_spec.rb +30 -0
  34. data/spec/filewatch/watched_files_collection_spec.rb +62 -8
  35. data/spec/helpers/spec_helper.rb +1 -0
  36. data/spec/inputs/file_read_spec.rb +119 -0
  37. metadata +17 -2
Binary file
@@ -2,8 +2,13 @@
2
2
 
3
3
  module LogStash module Inputs
4
4
  class DeleteCompletedFileHandler
5
+ def initialize(watch)
6
+ @watch = watch
7
+ end
8
+
5
9
  def handle(path)
6
10
  Pathname.new(path).unlink rescue nil
11
+ @watch.watched_files_collection.remove_paths([path])
7
12
  end
8
13
  end
9
14
  end end
@@ -6,6 +6,7 @@ require "logstash/codecs/identity_map_codec"
6
6
  require "pathname"
7
7
  require "socket" # for Socket.gethostname
8
8
  require "fileutils"
9
+ require "concurrent/atomic/atomic_reference"
9
10
 
10
11
  require_relative "file/patch"
11
12
  require_relative "file_listener"
@@ -247,6 +248,9 @@ class File < LogStash::Inputs::Base
247
248
  end
248
249
  end
249
250
 
251
+ # @private used in specs
252
+ attr_reader :watcher
253
+
250
254
  def register
251
255
  require "addressable/uri"
252
256
  require "digest/md5"
@@ -274,8 +278,6 @@ class File < LogStash::Inputs::Base
274
278
  :check_archive_validity => @check_archive_validity,
275
279
  }
276
280
 
277
- @completed_file_handlers = []
278
-
279
281
  @path.each do |path|
280
282
  if Pathname.new(path).relative?
281
283
  raise ArgumentError.new("File paths must be absolute, relative path specified: #{path}")
@@ -319,15 +321,10 @@ class File < LogStash::Inputs::Base
319
321
  @watcher_class = FileWatch::ObservingTail
320
322
  else
321
323
  @watcher_class = FileWatch::ObservingRead
322
- if @file_completed_action.include?('log')
323
- @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
324
- end
325
- if @file_completed_action.include?('delete')
326
- @completed_file_handlers << DeleteCompletedFileHandler.new
327
- end
328
324
  end
329
325
  @codec = LogStash::Codecs::IdentityMapCodec.new(@codec)
330
326
  @completely_stopped = Concurrent::AtomicBoolean.new
327
+ @queue = Concurrent::AtomicReference.new
331
328
  end # def register
332
329
 
333
330
  def completely_stopped?
@@ -335,8 +332,9 @@ class File < LogStash::Inputs::Base
335
332
  @completely_stopped.true?
336
333
  end
337
334
 
335
+ # The WatchedFile calls back here as `observer.listener_for(@path)`
336
+ # @param [String] path the identity
338
337
  def listener_for(path)
339
- # path is the identity
340
338
  FileListener.new(path, self)
341
339
  end
342
340
 
@@ -344,13 +342,25 @@ class File < LogStash::Inputs::Base
344
342
  # if the pipeline restarts this input,
345
343
  # make sure previous files are closed
346
344
  stop
345
+
347
346
  @watcher = @watcher_class.new(@filewatch_config)
347
+
348
+ @completed_file_handlers = []
349
+ if read_mode?
350
+ if @file_completed_action.include?('log')
351
+ @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
352
+ end
353
+ if @file_completed_action.include?('delete')
354
+ @completed_file_handlers << DeleteCompletedFileHandler.new(@watcher.watch)
355
+ end
356
+ end
357
+
348
358
  @path.each { |path| @watcher.watch_this(path) }
349
359
  end
350
360
 
351
361
  def run(queue)
352
362
  start_processing
353
- @queue = queue
363
+ @queue.set queue
354
364
  @watcher.subscribe(self) # halts here until quit is called
355
365
  # last action of the subscribe call is to write the sincedb
356
366
  exit_flush
@@ -361,18 +371,18 @@ class File < LogStash::Inputs::Base
361
371
  event.set("[@metadata][host]", @host)
362
372
  event.set("host", @host) unless event.include?("host")
363
373
  decorate(event)
364
- @queue << event
374
+ @queue.get << event
365
375
  end
366
376
 
367
377
  def handle_deletable_path(path)
368
378
  return if tail_mode?
369
379
  return if @completed_file_handlers.empty?
380
+ @logger.debug? && @logger.debug(__method__.to_s, :path => path)
370
381
  @completed_file_handlers.each { |handler| handler.handle(path) }
371
382
  end
372
383
 
373
384
  def log_line_received(path, line)
374
- return unless @logger.debug?
375
- @logger.debug("Received line", :path => path, :text => line)
385
+ @logger.debug? && @logger.debug("Received line", :path => path, :text => line)
376
386
  end
377
387
 
378
388
  def stop
@@ -382,6 +392,11 @@ class File < LogStash::Inputs::Base
382
392
  end
383
393
  end
384
394
 
395
+ # @private used in specs
396
+ def queue
397
+ @queue.get
398
+ end
399
+
385
400
  private
386
401
 
387
402
  def build_sincedb_base_from_settings(settings)
@@ -7,9 +7,9 @@ module LogStash module Inputs
7
7
  class FileListener
8
8
  attr_reader :input, :path, :data
9
9
  # construct with link back to the input plugin instance.
10
- def initialize(path, input)
10
+ def initialize(path, input, data = nil)
11
11
  @path, @input = path, input
12
- @data = nil
12
+ @data = data
13
13
  end
14
14
 
15
15
  def opened
@@ -36,7 +36,7 @@ module LogStash module Inputs
36
36
  def accept(data)
37
37
  # and push transient data filled dup listener downstream
38
38
  input.log_line_received(path, data)
39
- input.codec.accept(dup_adding_state(data))
39
+ input.codec.accept(self.class.new(path, input, data))
40
40
  end
41
41
 
42
42
  def process_event(event)
@@ -45,17 +45,6 @@ module LogStash module Inputs
45
45
  input.post_process_this(event)
46
46
  end
47
47
 
48
- def add_state(data)
49
- @data = data
50
- self
51
- end
52
-
53
- private
54
-
55
- # duplicate and add state for downstream
56
- def dup_adding_state(line)
57
- self.class.new(path, input).add_state(line)
58
- end
59
48
  end
60
49
 
61
50
  class FlushableListener < FileListener
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-file'
4
- s.version = '4.1.17'
4
+ s.version = '4.2.3'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -31,6 +31,7 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency 'addressable'
32
32
  end
33
33
 
34
+ s.add_runtime_dependency 'concurrent-ruby', '~> 1.0'
34
35
  s.add_runtime_dependency 'logstash-codec-multiline', ['~> 3.0']
35
36
 
36
37
  s.add_development_dependency 'stud', ['~> 0.0.19']
@@ -23,9 +23,12 @@ module FileWatch
23
23
  let(:start_new_files_at) { :end } # should be irrelevant for read mode
24
24
  let(:opts) do
25
25
  {
26
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at,
27
- :delimiter => "\n", :discover_interval => discover_interval,
28
- :ignore_older => 3600, :sincedb_path => sincedb_path
26
+ :stat_interval => stat_interval,
27
+ :start_new_files_at => start_new_files_at,
28
+ :delimiter => "\n",
29
+ :discover_interval => discover_interval,
30
+ :ignore_older => 3600,
31
+ :sincedb_path => sincedb_path
29
32
  }
30
33
  end
31
34
  let(:observer) { TestObserver.new }
@@ -147,6 +150,50 @@ module FileWatch
147
150
  end
148
151
  end
149
152
 
153
+ context "when watching directory with files and adding a new file" do
154
+ let(:file_path2) { ::File.join(directory, "2.log") }
155
+ let(:file_path3) { ::File.join(directory, "3.log") }
156
+
157
+ let(:opts) { super.merge(:file_sort_by => "last_modified") }
158
+ let(:lines) { [] }
159
+ let(:observer) { TestObserver.new(lines) }
160
+
161
+
162
+ let(:listener2) { observer.listener_for(file_path2) }
163
+ let(:listener3) { observer.listener_for(file_path3) }
164
+
165
+ let(:actions) do
166
+ RSpec::Sequencing.run("create12") do
167
+ File.open(file_path, "w") { |file| file.write("string11\nstring12") }
168
+ File.open(file_path2, "w") { |file| file.write("string21\nstring22") }
169
+ end
170
+ .then("watch") do
171
+ reading.watch_this(watch_dir)
172
+ end
173
+ .then("wait12") do
174
+ wait(2).for { listener1.calls.last == :delete && listener2.calls.last == :delete }.to eq(true)
175
+ end
176
+ .then_after(2, "create3") do
177
+ File.open(file_path3, "w") { |file| file.write("string31\nstring32") }
178
+ end
179
+ .then("wait3") do
180
+ wait(2).for { listener3.calls.last == :delete }.to eq(true)
181
+ end
182
+ .then("quit") do
183
+ reading.quit
184
+ end
185
+ end
186
+
187
+ it "reads all (3) files" do
188
+ actions.activate_quietly
189
+ reading.subscribe(observer)
190
+ actions.assert_no_errors
191
+ expect(lines.last).to eq 'string32'
192
+ expect(lines.sort).to eq %w(string11 string12 string21 string22 string31 string32)
193
+ expect( reading.watch.watched_files_collection.paths ).to eq [ file_path, file_path2, file_path3 ]
194
+ end
195
+ end
196
+
150
197
  context "when watching a directory with files using exit_after_read" do
151
198
  let(:opts) { super.merge(:exit_after_read => true, :max_open_files => 2) }
152
199
  let(:file_path3) { ::File.join(directory, "3.log") }
@@ -159,40 +206,45 @@ module FileWatch
159
206
  let(:listener6) { observer.listener_for(file_path6) }
160
207
 
161
208
  it "the file is read" do
162
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
209
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
163
210
  reading.watch_this(watch_dir)
164
211
  reading.subscribe(observer)
165
212
  expect(listener3.lines).to eq(["line1", "line2"])
166
213
  end
214
+
167
215
  it "multiple files are read" do
168
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
216
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
169
217
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
170
218
  reading.watch_this(watch_dir)
171
219
  reading.subscribe(observer)
172
220
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4"])
173
221
  end
222
+
174
223
  it "multiple files are read even if max_open_files is smaller then number of files" do
175
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
224
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
176
225
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
177
226
  File.open(file_path5, "w") { |file| file.write("line5\nline6\n") }
178
227
  reading.watch_this(watch_dir)
179
228
  reading.subscribe(observer)
180
229
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4", "line5", "line6"])
181
230
  end
231
+
182
232
  it "file as marked as reading_completed" do
183
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
233
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
184
234
  reading.watch_this(watch_dir)
185
235
  reading.subscribe(observer)
186
236
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
187
237
  end
238
+
188
239
  it "sincedb works correctly" do
189
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
240
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
190
241
  reading.watch_this(watch_dir)
191
242
  reading.subscribe(observer)
192
243
  sincedb_record_fields = File.read(sincedb_path).split(" ")
193
244
  position_field_index = 3
194
245
  expect(sincedb_record_fields[position_field_index]).to eq("12")
195
246
  end
247
+
196
248
  it "does not include new files added after start" do
197
249
  File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
198
250
  reading.watch_this(watch_dir)
@@ -201,7 +253,6 @@ module FileWatch
201
253
  expect(listener3.lines).to eq(["line1", "line2"])
202
254
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
203
255
  expect(listener6.calls).to eq([])
204
-
205
256
  end
206
257
 
207
258
  end
@@ -1,3 +1,6 @@
1
+ require 'logstash/devutils/rspec/spec_helper'
2
+ require 'logstash/inputs/friendly_durations'
3
+
1
4
  describe FileWatch::Settings do
2
5
 
3
6
  context "when create from options" do
@@ -9,7 +9,9 @@ module FileWatch
9
9
  let(:io) { StringIO.new }
10
10
  let(:db) { Hash.new }
11
11
 
12
- subject { SincedbRecordSerializer.new(SincedbRecordSerializer.days_to_seconds(14)) }
12
+ let(:sincedb_value_expiry) { SincedbRecordSerializer.days_to_seconds(14) }
13
+
14
+ subject { SincedbRecordSerializer.new(sincedb_value_expiry) }
13
15
 
14
16
  context "deserialize from IO" do
15
17
  it 'reads V1 records' do
@@ -82,8 +84,10 @@ module FileWatch
82
84
  end
83
85
 
84
86
  context "given a non default `sincedb_clean_after`" do
87
+
88
+ let(:sincedb_value_expiry) { SincedbRecordSerializer.days_to_seconds(2) }
89
+
85
90
  it "does not write expired db entries to an IO object" do
86
- subject.update_sincedb_value_expiry_from_days(2)
87
91
  one_day_ago = Time.now.to_f - (1.0*24*3600)
88
92
  three_days_ago = one_day_ago - (2.0*24*3600)
89
93
  db[InodeStruct.new("42424242", 2, 5)] = SincedbValue.new(42, one_day_ago)
@@ -117,17 +117,12 @@ module FileWatch
117
117
  class Listener
118
118
  attr_reader :path, :lines, :calls
119
119
 
120
- def initialize(path)
120
+ def initialize(path, lines)
121
121
  @path = path
122
- @lines = Concurrent::Array.new
122
+ @lines = lines || Concurrent::Array.new
123
123
  @calls = Concurrent::Array.new
124
124
  end
125
125
 
126
- def add_lines(lines)
127
- @lines = lines
128
- self
129
- end
130
-
131
126
  def accept(line)
132
127
  @lines << line
133
128
  @calls << :accept
@@ -161,12 +156,7 @@ module FileWatch
161
156
  attr_reader :listeners
162
157
 
163
158
  def initialize(combined_lines = nil)
164
- listener_proc = if combined_lines.nil?
165
- lambda{|k| Listener.new(k) }
166
- else
167
- lambda{|k| Listener.new(k).add_lines(combined_lines) }
168
- end
169
- @listeners = Concurrent::Hash.new {|hash, key| hash[key] = listener_proc.call(key) }
159
+ @listeners = Concurrent::Hash.new { |hash, key| hash[key] = new_listener(key, combined_lines) }
170
160
  end
171
161
 
172
162
  def listener_for(path)
@@ -174,6 +164,14 @@ module FileWatch
174
164
  end
175
165
 
176
166
  def clear
177
- @listeners.clear; end
167
+ @listeners.clear
168
+ end
169
+
170
+ private
171
+
172
+ def new_listener(path, lines = nil)
173
+ Listener.new(path, lines)
174
+ end
175
+
178
176
  end
179
177
  end
@@ -10,15 +10,19 @@ module FileWatch
10
10
  let(:file_path) { ::File.join(directory, "1#{suffix}.log") }
11
11
  let(:file_path2) { ::File.join(directory, "2#{suffix}.log") }
12
12
  let(:file_path3) { ::File.join(directory, "3#{suffix}.log") }
13
- let(:max) { 4095 }
13
+ let(:max) { 4095 }
14
14
  let(:stat_interval) { 0.1 }
15
15
  let(:discover_interval) { 4 }
16
16
  let(:start_new_files_at) { :end }
17
17
  let(:sincedb_path) { ::File.join(directory, "tailing.sdb") }
18
18
  let(:opts) do
19
19
  {
20
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_open_files => max,
21
- :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path,
20
+ :stat_interval => stat_interval,
21
+ :start_new_files_at => start_new_files_at,
22
+ :max_open_files => max,
23
+ :delimiter => "\n",
24
+ :discover_interval => discover_interval,
25
+ :sincedb_path => sincedb_path,
22
26
  :file_sort_by => "path"
23
27
  }
24
28
  end
@@ -30,12 +34,11 @@ module FileWatch
30
34
 
31
35
  before do
32
36
  directory
33
- wait(1.0).for{Dir.exist?(directory)}.to eq(true)
37
+ wait(1.0).for { Dir.exist?(directory) }.to eq(true)
34
38
  end
35
39
 
36
40
  after do
37
41
  FileUtils.rm_rf(directory)
38
- wait(1.0).for{Dir.exist?(directory)}.to eq(false)
39
42
  end
40
43
 
41
44
  describe "max open files (set to 1)" do
@@ -95,16 +98,16 @@ module FileWatch
95
98
  let(:actions) do
96
99
  RSpec::Sequencing
97
100
  .run("create file") do
98
- File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
101
+ File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
99
102
  end
100
103
  .then_after(0.1, "begin watching") do
101
104
  tailing.watch_this(watch_dir)
102
105
  end
103
- .then_after(2, "add content") do
104
- File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
106
+ .then_after(1.0, "add content") do
107
+ File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
105
108
  end
106
109
  .then("wait") do
107
- wait(0.75).for{listener1.lines}.to eq(["line1", "line2"])
110
+ wait(0.75).for { listener1.lines }.to_not be_empty
108
111
  end
109
112
  .then("quit") do
110
113
  tailing.quit
@@ -113,7 +116,6 @@ module FileWatch
113
116
 
114
117
  it "only the new content is read" do
115
118
  actions.activate_quietly
116
- tailing.watch_this(watch_dir)
117
119
  tailing.subscribe(observer)
118
120
  actions.assert_no_errors
119
121
  expect(listener1.calls).to eq([:open, :accept, :accept])
@@ -132,7 +134,7 @@ module FileWatch
132
134
  File.open(file_path, "wb") { |file| file.write("line1\nline2\n") }
133
135
  end
134
136
  .then("wait") do
135
- wait(0.75).for{listener1.lines.size}.to eq(2)
137
+ wait(0.75).for { listener1.lines }.to_not be_empty
136
138
  end
137
139
  .then("quit") do
138
140
  tailing.quit
@@ -154,7 +156,7 @@ module FileWatch
154
156
  # so when a stat is taken on the file an error is raised
155
157
  let(:suffix) { "E" }
156
158
  let(:quit_after) { 0.2 }
157
- let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
159
+ let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
158
160
  let(:watched_file) { WatchedFile.new(file_path, stat, tailing.settings) }
159
161
  before do
160
162
  allow(stat).to receive(:restat).and_raise(Errno::ENOENT)