logstash-input-file 4.1.16 → 4.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -0
  3. data/LICENSE +199 -10
  4. data/docs/index.asciidoc +23 -7
  5. data/lib/filewatch/discoverer.rb +9 -8
  6. data/lib/filewatch/observing_base.rb +1 -12
  7. data/lib/filewatch/processor.rb +55 -0
  8. data/lib/filewatch/read_mode/handlers/base.rb +8 -6
  9. data/lib/filewatch/read_mode/handlers/read_file.rb +26 -8
  10. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +63 -34
  11. data/lib/filewatch/read_mode/processor.rb +22 -36
  12. data/lib/filewatch/settings.rb +3 -2
  13. data/lib/filewatch/sincedb_collection.rb +23 -21
  14. data/lib/filewatch/stat/generic.rb +8 -13
  15. data/lib/filewatch/stat/windows_path.rb +7 -9
  16. data/lib/filewatch/tail_mode/handlers/delete.rb +2 -4
  17. data/lib/filewatch/tail_mode/processor.rb +47 -54
  18. data/lib/filewatch/watch.rb +12 -14
  19. data/lib/filewatch/watched_file.rb +25 -14
  20. data/lib/filewatch/watched_files_collection.rb +11 -74
  21. data/lib/jars/filewatch-1.0.1.jar +0 -0
  22. data/lib/logstash/inputs/delete_completed_file_handler.rb +5 -0
  23. data/lib/logstash/inputs/file.rb +32 -11
  24. data/logstash-input-file.gemspec +3 -2
  25. data/spec/filewatch/reading_spec.rb +60 -9
  26. data/spec/filewatch/rotate_spec.rb +2 -1
  27. data/spec/filewatch/settings_spec.rb +3 -0
  28. data/spec/filewatch/spec_helper.rb +13 -15
  29. data/spec/filewatch/tailing_spec.rb +14 -12
  30. data/spec/filewatch/watched_file_spec.rb +30 -0
  31. data/spec/filewatch/watched_files_collection_spec.rb +62 -8
  32. data/spec/helpers/spec_helper.rb +8 -0
  33. data/spec/inputs/file_read_spec.rb +154 -4
  34. data/spec/inputs/file_tail_spec.rb +3 -2
  35. metadata +21 -6
Binary file
@@ -2,8 +2,13 @@
2
2
 
3
3
  module LogStash module Inputs
4
4
  class DeleteCompletedFileHandler
5
+ def initialize(watch)
6
+ @watch = watch
7
+ end
8
+
5
9
  def handle(path)
6
10
  Pathname.new(path).unlink rescue nil
11
+ @watch.watched_files_collection.remove_paths([path])
7
12
  end
8
13
  end
9
14
  end end
@@ -6,6 +6,7 @@ require "logstash/codecs/identity_map_codec"
6
6
  require "pathname"
7
7
  require "socket" # for Socket.gethostname
8
8
  require "fileutils"
9
+ require "concurrent/atomic/atomic_reference"
9
10
 
10
11
  require_relative "file/patch"
11
12
  require_relative "file_listener"
@@ -227,6 +228,11 @@ class File < LogStash::Inputs::Base
227
228
  # Sincedb still works, if you run LS once again after doing some changes - only new values will be read
228
229
  config :exit_after_read, :validate => :boolean, :default => false
229
230
 
231
+ # Before start read a compressed file, checks for its validity.
232
+ # This request a full read of the archive, so potentially could cost time.
233
+ # If not specified to true, and the file is corrupted, could end in cyclic processing of the broken file.
234
+ config :check_archive_validity, :validate => :boolean, :default => false
235
+
230
236
  public
231
237
 
232
238
  class << self
@@ -242,6 +248,9 @@ class File < LogStash::Inputs::Base
242
248
  end
243
249
  end
244
250
 
251
+ # @private used in specs
252
+ attr_reader :watcher
253
+
245
254
  def register
246
255
  require "addressable/uri"
247
256
  require "digest/md5"
@@ -266,10 +275,9 @@ class File < LogStash::Inputs::Base
266
275
  :file_sort_by => @file_sort_by,
267
276
  :file_sort_direction => @file_sort_direction,
268
277
  :exit_after_read => @exit_after_read,
278
+ :check_archive_validity => @check_archive_validity,
269
279
  }
270
280
 
271
- @completed_file_handlers = []
272
-
273
281
  @path.each do |path|
274
282
  if Pathname.new(path).relative?
275
283
  raise ArgumentError.new("File paths must be absolute, relative path specified: #{path}")
@@ -313,15 +321,10 @@ class File < LogStash::Inputs::Base
313
321
  @watcher_class = FileWatch::ObservingTail
314
322
  else
315
323
  @watcher_class = FileWatch::ObservingRead
316
- if @file_completed_action.include?('log')
317
- @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
318
- end
319
- if @file_completed_action.include?('delete')
320
- @completed_file_handlers << DeleteCompletedFileHandler.new
321
- end
322
324
  end
323
325
  @codec = LogStash::Codecs::IdentityMapCodec.new(@codec)
324
326
  @completely_stopped = Concurrent::AtomicBoolean.new
327
+ @queue = Concurrent::AtomicReference.new
325
328
  end # def register
326
329
 
327
330
  def completely_stopped?
@@ -329,8 +332,9 @@ class File < LogStash::Inputs::Base
329
332
  @completely_stopped.true?
330
333
  end
331
334
 
335
+ # The WatchedFile calls back here as `observer.listener_for(@path)`
336
+ # @param [String] path the identity
332
337
  def listener_for(path)
333
- # path is the identity
334
338
  FileListener.new(path, self)
335
339
  end
336
340
 
@@ -338,13 +342,25 @@ class File < LogStash::Inputs::Base
338
342
  # if the pipeline restarts this input,
339
343
  # make sure previous files are closed
340
344
  stop
345
+
341
346
  @watcher = @watcher_class.new(@filewatch_config)
347
+
348
+ @completed_file_handlers = []
349
+ if read_mode?
350
+ if @file_completed_action.include?('log')
351
+ @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
352
+ end
353
+ if @file_completed_action.include?('delete')
354
+ @completed_file_handlers << DeleteCompletedFileHandler.new(@watcher.watch)
355
+ end
356
+ end
357
+
342
358
  @path.each { |path| @watcher.watch_this(path) }
343
359
  end
344
360
 
345
361
  def run(queue)
346
362
  start_processing
347
- @queue = queue
363
+ @queue.set queue
348
364
  @watcher.subscribe(self) # halts here until quit is called
349
365
  # last action of the subscribe call is to write the sincedb
350
366
  exit_flush
@@ -355,7 +371,7 @@ class File < LogStash::Inputs::Base
355
371
  event.set("[@metadata][host]", @host)
356
372
  event.set("host", @host) unless event.include?("host")
357
373
  decorate(event)
358
- @queue << event
374
+ @queue.get << event
359
375
  end
360
376
 
361
377
  def handle_deletable_path(path)
@@ -376,6 +392,11 @@ class File < LogStash::Inputs::Base
376
392
  end
377
393
  end
378
394
 
395
+ # @private used in specs
396
+ def queue
397
+ @queue.get
398
+ end
399
+
379
400
  private
380
401
 
381
402
  def build_sincedb_base_from_settings(settings)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-file'
4
- s.version = '4.1.16'
4
+ s.version = '4.2.2'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -31,10 +31,11 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency 'addressable'
32
32
  end
33
33
 
34
+ s.add_runtime_dependency 'concurrent-ruby', '~> 1.0'
34
35
  s.add_runtime_dependency 'logstash-codec-multiline', ['~> 3.0']
35
36
 
36
37
  s.add_development_dependency 'stud', ['~> 0.0.19']
37
- s.add_development_dependency 'logstash-devutils', '~> 1.3'
38
+ s.add_development_dependency 'logstash-devutils'
38
39
  s.add_development_dependency 'logstash-codec-json'
39
40
  s.add_development_dependency 'rspec-sequencing'
40
41
  s.add_development_dependency "rspec-wait"
@@ -23,9 +23,12 @@ module FileWatch
23
23
  let(:start_new_files_at) { :end } # should be irrelevant for read mode
24
24
  let(:opts) do
25
25
  {
26
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at,
27
- :delimiter => "\n", :discover_interval => discover_interval,
28
- :ignore_older => 3600, :sincedb_path => sincedb_path
26
+ :stat_interval => stat_interval,
27
+ :start_new_files_at => start_new_files_at,
28
+ :delimiter => "\n",
29
+ :discover_interval => discover_interval,
30
+ :ignore_older => 3600,
31
+ :sincedb_path => sincedb_path
29
32
  }
30
33
  end
31
34
  let(:observer) { TestObserver.new }
@@ -147,6 +150,50 @@ module FileWatch
147
150
  end
148
151
  end
149
152
 
153
+ context "when watching directory with files and adding a new file" do
154
+ let(:file_path2) { ::File.join(directory, "2.log") }
155
+ let(:file_path3) { ::File.join(directory, "3.log") }
156
+
157
+ let(:opts) { super.merge(:file_sort_by => "last_modified") }
158
+ let(:lines) { [] }
159
+ let(:observer) { TestObserver.new(lines) }
160
+
161
+
162
+ let(:listener2) { observer.listener_for(file_path2) }
163
+ let(:listener3) { observer.listener_for(file_path3) }
164
+
165
+ let(:actions) do
166
+ RSpec::Sequencing.run("create12") do
167
+ File.open(file_path, "w") { |file| file.write("string11\nstring12") }
168
+ File.open(file_path2, "w") { |file| file.write("string21\nstring22") }
169
+ end
170
+ .then("watch") do
171
+ reading.watch_this(watch_dir)
172
+ end
173
+ .then("wait12") do
174
+ wait(2).for { listener1.calls.last == :delete && listener2.calls.last == :delete }.to eq(true)
175
+ end
176
+ .then_after(2, "create3") do
177
+ File.open(file_path3, "w") { |file| file.write("string31\nstring32") }
178
+ end
179
+ .then("wait3") do
180
+ wait(2).for { listener3.calls.last == :delete }.to eq(true)
181
+ end
182
+ .then("quit") do
183
+ reading.quit
184
+ end
185
+ end
186
+
187
+ it "reads all (3) files" do
188
+ actions.activate_quietly
189
+ reading.subscribe(observer)
190
+ actions.assert_no_errors
191
+ expect(lines.last).to eq 'string32'
192
+ expect(lines.sort).to eq %w(string11 string12 string21 string22 string31 string32)
193
+ expect( reading.watch.watched_files_collection.paths ).to eq [ file_path, file_path2, file_path3 ]
194
+ end
195
+ end
196
+
150
197
  context "when watching a directory with files using exit_after_read" do
151
198
  let(:opts) { super.merge(:exit_after_read => true, :max_open_files => 2) }
152
199
  let(:file_path3) { ::File.join(directory, "3.log") }
@@ -159,40 +206,45 @@ module FileWatch
159
206
  let(:listener6) { observer.listener_for(file_path6) }
160
207
 
161
208
  it "the file is read" do
162
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
209
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
163
210
  reading.watch_this(watch_dir)
164
211
  reading.subscribe(observer)
165
212
  expect(listener3.lines).to eq(["line1", "line2"])
166
213
  end
214
+
167
215
  it "multiple files are read" do
168
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
216
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
169
217
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
170
218
  reading.watch_this(watch_dir)
171
219
  reading.subscribe(observer)
172
220
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4"])
173
221
  end
222
+
174
223
  it "multiple files are read even if max_open_files is smaller then number of files" do
175
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
224
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
176
225
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
177
226
  File.open(file_path5, "w") { |file| file.write("line5\nline6\n") }
178
227
  reading.watch_this(watch_dir)
179
228
  reading.subscribe(observer)
180
229
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4", "line5", "line6"])
181
230
  end
231
+
182
232
  it "file as marked as reading_completed" do
183
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
233
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
184
234
  reading.watch_this(watch_dir)
185
235
  reading.subscribe(observer)
186
236
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
187
237
  end
238
+
188
239
  it "sincedb works correctly" do
189
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
240
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
190
241
  reading.watch_this(watch_dir)
191
242
  reading.subscribe(observer)
192
243
  sincedb_record_fields = File.read(sincedb_path).split(" ")
193
244
  position_field_index = 3
194
245
  expect(sincedb_record_fields[position_field_index]).to eq("12")
195
246
  end
247
+
196
248
  it "does not include new files added after start" do
197
249
  File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
198
250
  reading.watch_this(watch_dir)
@@ -201,7 +253,6 @@ module FileWatch
201
253
  expect(listener3.lines).to eq(["line1", "line2"])
202
254
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
203
255
  expect(listener6.calls).to eq([])
204
-
205
256
  end
206
257
 
207
258
  end
@@ -73,7 +73,8 @@ module FileWatch
73
73
  FileUtils.mv(directory.join("1.logtmp").to_path, file1_path)
74
74
  end
75
75
  .then("wait for expectation") do
76
- wait(2).for{listener1.calls}.to eq([:open, :accept, :accept, :accept])
76
+ sleep(0.25) # if ENV['CI']
77
+ wait(2).for { listener1.calls }.to eq([:open, :accept, :accept, :accept])
77
78
  end
78
79
  .then("quit") do
79
80
  tailing.quit
@@ -1,3 +1,6 @@
1
+ require 'logstash/devutils/rspec/spec_helper'
2
+ require 'logstash/inputs/friendly_durations'
3
+
1
4
  describe FileWatch::Settings do
2
5
 
3
6
  context "when create from options" do
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require "rspec_sequencing"
3
- # require 'rspec/wait'
3
+ require 'rspec/wait'
4
4
  require "logstash/devutils/rspec/spec_helper"
5
5
  require "concurrent"
6
6
  require "timecop"
@@ -117,17 +117,12 @@ module FileWatch
117
117
  class Listener
118
118
  attr_reader :path, :lines, :calls
119
119
 
120
- def initialize(path)
120
+ def initialize(path, lines)
121
121
  @path = path
122
- @lines = Concurrent::Array.new
122
+ @lines = lines || Concurrent::Array.new
123
123
  @calls = Concurrent::Array.new
124
124
  end
125
125
 
126
- def add_lines(lines)
127
- @lines = lines
128
- self
129
- end
130
-
131
126
  def accept(line)
132
127
  @lines << line
133
128
  @calls << :accept
@@ -161,12 +156,7 @@ module FileWatch
161
156
  attr_reader :listeners
162
157
 
163
158
  def initialize(combined_lines = nil)
164
- listener_proc = if combined_lines.nil?
165
- lambda{|k| Listener.new(k) }
166
- else
167
- lambda{|k| Listener.new(k).add_lines(combined_lines) }
168
- end
169
- @listeners = Concurrent::Hash.new {|hash, key| hash[key] = listener_proc.call(key) }
159
+ @listeners = Concurrent::Hash.new { |hash, key| hash[key] = new_listener(key, combined_lines) }
170
160
  end
171
161
 
172
162
  def listener_for(path)
@@ -174,6 +164,14 @@ module FileWatch
174
164
  end
175
165
 
176
166
  def clear
177
- @listeners.clear; end
167
+ @listeners.clear
168
+ end
169
+
170
+ private
171
+
172
+ def new_listener(path, lines = nil)
173
+ Listener.new(path, lines)
174
+ end
175
+
178
176
  end
179
177
  end
@@ -10,15 +10,19 @@ module FileWatch
10
10
  let(:file_path) { ::File.join(directory, "1#{suffix}.log") }
11
11
  let(:file_path2) { ::File.join(directory, "2#{suffix}.log") }
12
12
  let(:file_path3) { ::File.join(directory, "3#{suffix}.log") }
13
- let(:max) { 4095 }
13
+ let(:max) { 4095 }
14
14
  let(:stat_interval) { 0.1 }
15
15
  let(:discover_interval) { 4 }
16
16
  let(:start_new_files_at) { :end }
17
17
  let(:sincedb_path) { ::File.join(directory, "tailing.sdb") }
18
18
  let(:opts) do
19
19
  {
20
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_open_files => max,
21
- :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path,
20
+ :stat_interval => stat_interval,
21
+ :start_new_files_at => start_new_files_at,
22
+ :max_open_files => max,
23
+ :delimiter => "\n",
24
+ :discover_interval => discover_interval,
25
+ :sincedb_path => sincedb_path,
22
26
  :file_sort_by => "path"
23
27
  }
24
28
  end
@@ -30,12 +34,11 @@ module FileWatch
30
34
 
31
35
  before do
32
36
  directory
33
- wait(1.0).for{Dir.exist?(directory)}.to eq(true)
37
+ wait(1.0).for { Dir.exist?(directory) }.to eq(true)
34
38
  end
35
39
 
36
40
  after do
37
41
  FileUtils.rm_rf(directory)
38
- wait(1.0).for{Dir.exist?(directory)}.to eq(false)
39
42
  end
40
43
 
41
44
  describe "max open files (set to 1)" do
@@ -95,16 +98,16 @@ module FileWatch
95
98
  let(:actions) do
96
99
  RSpec::Sequencing
97
100
  .run("create file") do
98
- File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
101
+ File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
99
102
  end
100
103
  .then_after(0.1, "begin watching") do
101
104
  tailing.watch_this(watch_dir)
102
105
  end
103
- .then_after(2, "add content") do
104
- File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
106
+ .then_after(1.0, "add content") do
107
+ File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
105
108
  end
106
109
  .then("wait") do
107
- wait(0.75).for{listener1.lines}.to eq(["line1", "line2"])
110
+ wait(0.75).for { listener1.lines }.to_not be_empty
108
111
  end
109
112
  .then("quit") do
110
113
  tailing.quit
@@ -113,7 +116,6 @@ module FileWatch
113
116
 
114
117
  it "only the new content is read" do
115
118
  actions.activate_quietly
116
- tailing.watch_this(watch_dir)
117
119
  tailing.subscribe(observer)
118
120
  actions.assert_no_errors
119
121
  expect(listener1.calls).to eq([:open, :accept, :accept])
@@ -132,7 +134,7 @@ module FileWatch
132
134
  File.open(file_path, "wb") { |file| file.write("line1\nline2\n") }
133
135
  end
134
136
  .then("wait") do
135
- wait(0.75).for{listener1.lines.size}.to eq(2)
137
+ wait(0.75).for { listener1.lines }.to_not be_empty
136
138
  end
137
139
  .then("quit") do
138
140
  tailing.quit
@@ -154,7 +156,7 @@ module FileWatch
154
156
  # so when a stat is taken on the file an error is raised
155
157
  let(:suffix) { "E" }
156
158
  let(:quit_after) { 0.2 }
157
- let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
159
+ let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
158
160
  let(:watched_file) { WatchedFile.new(file_path, stat, tailing.settings) }
159
161
  before do
160
162
  allow(stat).to receive(:restat).and_raise(Errno::ENOENT)
@@ -35,5 +35,35 @@ module FileWatch
35
35
  expect(watched_file.recent_states).to eq([:watched, :active, :watched, :closed, :watched, :active, :unwatched, :active])
36
36
  end
37
37
  end
38
+
39
+ context 'restat' do
40
+
41
+ let(:directory) { Stud::Temporary.directory }
42
+ let(:file_path) { ::File.join(directory, "restat.file.txt") }
43
+ let(:pathname) { Pathname.new(file_path) }
44
+
45
+ before { FileUtils.touch file_path, :mtime => Time.now - 300 }
46
+
47
+ it 'reports false value when no changes' do
48
+ file = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new)
49
+ mtime = file.modified_at
50
+ expect( file.modified_at_changed? ).to be false
51
+ expect( file.restat! ).to be_falsy
52
+ expect( file.modified_at_changed? ).to be false
53
+ expect( file.modified_at ).to eql mtime
54
+ expect( file.modified_at(true) ).to eql mtime
55
+ end
56
+
57
+ it 'reports truthy when changes detected' do
58
+ file = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new)
59
+ mtime = file.modified_at
60
+ expect( file.modified_at_changed? ).to be false
61
+ FileUtils.touch file_path
62
+ expect( file.restat! ).to be_truthy
63
+ expect( file.modified_at_changed? ).to be true
64
+ expect( file.modified_at ).to eql mtime # until updated
65
+ expect( file.modified_at(true) ).to be > mtime
66
+ end
67
+ end
38
68
  end
39
69
  end