logstash-input-file 4.1.16 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -0
  3. data/LICENSE +199 -10
  4. data/docs/index.asciidoc +23 -7
  5. data/lib/filewatch/discoverer.rb +9 -8
  6. data/lib/filewatch/observing_base.rb +1 -12
  7. data/lib/filewatch/processor.rb +55 -0
  8. data/lib/filewatch/read_mode/handlers/base.rb +8 -6
  9. data/lib/filewatch/read_mode/handlers/read_file.rb +26 -8
  10. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +63 -34
  11. data/lib/filewatch/read_mode/processor.rb +22 -36
  12. data/lib/filewatch/settings.rb +3 -2
  13. data/lib/filewatch/sincedb_collection.rb +23 -21
  14. data/lib/filewatch/stat/generic.rb +8 -13
  15. data/lib/filewatch/stat/windows_path.rb +7 -9
  16. data/lib/filewatch/tail_mode/handlers/delete.rb +2 -4
  17. data/lib/filewatch/tail_mode/processor.rb +47 -54
  18. data/lib/filewatch/watch.rb +12 -14
  19. data/lib/filewatch/watched_file.rb +25 -14
  20. data/lib/filewatch/watched_files_collection.rb +11 -74
  21. data/lib/jars/filewatch-1.0.1.jar +0 -0
  22. data/lib/logstash/inputs/delete_completed_file_handler.rb +5 -0
  23. data/lib/logstash/inputs/file.rb +32 -11
  24. data/logstash-input-file.gemspec +3 -2
  25. data/spec/filewatch/reading_spec.rb +60 -9
  26. data/spec/filewatch/rotate_spec.rb +2 -1
  27. data/spec/filewatch/settings_spec.rb +3 -0
  28. data/spec/filewatch/spec_helper.rb +13 -15
  29. data/spec/filewatch/tailing_spec.rb +14 -12
  30. data/spec/filewatch/watched_file_spec.rb +30 -0
  31. data/spec/filewatch/watched_files_collection_spec.rb +62 -8
  32. data/spec/helpers/spec_helper.rb +8 -0
  33. data/spec/inputs/file_read_spec.rb +154 -4
  34. data/spec/inputs/file_tail_spec.rb +3 -2
  35. metadata +21 -6
Binary file
@@ -2,8 +2,13 @@
2
2
 
3
3
  module LogStash module Inputs
4
4
  class DeleteCompletedFileHandler
5
+ def initialize(watch)
6
+ @watch = watch
7
+ end
8
+
5
9
  def handle(path)
6
10
  Pathname.new(path).unlink rescue nil
11
+ @watch.watched_files_collection.remove_paths([path])
7
12
  end
8
13
  end
9
14
  end end
@@ -6,6 +6,7 @@ require "logstash/codecs/identity_map_codec"
6
6
  require "pathname"
7
7
  require "socket" # for Socket.gethostname
8
8
  require "fileutils"
9
+ require "concurrent/atomic/atomic_reference"
9
10
 
10
11
  require_relative "file/patch"
11
12
  require_relative "file_listener"
@@ -227,6 +228,11 @@ class File < LogStash::Inputs::Base
227
228
  # Sincedb still works, if you run LS once again after doing some changes - only new values will be read
228
229
  config :exit_after_read, :validate => :boolean, :default => false
229
230
 
231
+ # Before start read a compressed file, checks for its validity.
232
+ # This request a full read of the archive, so potentially could cost time.
233
+ # If not specified to true, and the file is corrupted, could end in cyclic processing of the broken file.
234
+ config :check_archive_validity, :validate => :boolean, :default => false
235
+
230
236
  public
231
237
 
232
238
  class << self
@@ -242,6 +248,9 @@ class File < LogStash::Inputs::Base
242
248
  end
243
249
  end
244
250
 
251
+ # @private used in specs
252
+ attr_reader :watcher
253
+
245
254
  def register
246
255
  require "addressable/uri"
247
256
  require "digest/md5"
@@ -266,10 +275,9 @@ class File < LogStash::Inputs::Base
266
275
  :file_sort_by => @file_sort_by,
267
276
  :file_sort_direction => @file_sort_direction,
268
277
  :exit_after_read => @exit_after_read,
278
+ :check_archive_validity => @check_archive_validity,
269
279
  }
270
280
 
271
- @completed_file_handlers = []
272
-
273
281
  @path.each do |path|
274
282
  if Pathname.new(path).relative?
275
283
  raise ArgumentError.new("File paths must be absolute, relative path specified: #{path}")
@@ -313,15 +321,10 @@ class File < LogStash::Inputs::Base
313
321
  @watcher_class = FileWatch::ObservingTail
314
322
  else
315
323
  @watcher_class = FileWatch::ObservingRead
316
- if @file_completed_action.include?('log')
317
- @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
318
- end
319
- if @file_completed_action.include?('delete')
320
- @completed_file_handlers << DeleteCompletedFileHandler.new
321
- end
322
324
  end
323
325
  @codec = LogStash::Codecs::IdentityMapCodec.new(@codec)
324
326
  @completely_stopped = Concurrent::AtomicBoolean.new
327
+ @queue = Concurrent::AtomicReference.new
325
328
  end # def register
326
329
 
327
330
  def completely_stopped?
@@ -329,8 +332,9 @@ class File < LogStash::Inputs::Base
329
332
  @completely_stopped.true?
330
333
  end
331
334
 
335
+ # The WatchedFile calls back here as `observer.listener_for(@path)`
336
+ # @param [String] path the identity
332
337
  def listener_for(path)
333
- # path is the identity
334
338
  FileListener.new(path, self)
335
339
  end
336
340
 
@@ -338,13 +342,25 @@ class File < LogStash::Inputs::Base
338
342
  # if the pipeline restarts this input,
339
343
  # make sure previous files are closed
340
344
  stop
345
+
341
346
  @watcher = @watcher_class.new(@filewatch_config)
347
+
348
+ @completed_file_handlers = []
349
+ if read_mode?
350
+ if @file_completed_action.include?('log')
351
+ @completed_file_handlers << LogCompletedFileHandler.new(@file_completed_log_path)
352
+ end
353
+ if @file_completed_action.include?('delete')
354
+ @completed_file_handlers << DeleteCompletedFileHandler.new(@watcher.watch)
355
+ end
356
+ end
357
+
342
358
  @path.each { |path| @watcher.watch_this(path) }
343
359
  end
344
360
 
345
361
  def run(queue)
346
362
  start_processing
347
- @queue = queue
363
+ @queue.set queue
348
364
  @watcher.subscribe(self) # halts here until quit is called
349
365
  # last action of the subscribe call is to write the sincedb
350
366
  exit_flush
@@ -355,7 +371,7 @@ class File < LogStash::Inputs::Base
355
371
  event.set("[@metadata][host]", @host)
356
372
  event.set("host", @host) unless event.include?("host")
357
373
  decorate(event)
358
- @queue << event
374
+ @queue.get << event
359
375
  end
360
376
 
361
377
  def handle_deletable_path(path)
@@ -376,6 +392,11 @@ class File < LogStash::Inputs::Base
376
392
  end
377
393
  end
378
394
 
395
+ # @private used in specs
396
+ def queue
397
+ @queue.get
398
+ end
399
+
379
400
  private
380
401
 
381
402
  def build_sincedb_base_from_settings(settings)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-file'
4
- s.version = '4.1.16'
4
+ s.version = '4.2.2'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -31,10 +31,11 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency 'addressable'
32
32
  end
33
33
 
34
+ s.add_runtime_dependency 'concurrent-ruby', '~> 1.0'
34
35
  s.add_runtime_dependency 'logstash-codec-multiline', ['~> 3.0']
35
36
 
36
37
  s.add_development_dependency 'stud', ['~> 0.0.19']
37
- s.add_development_dependency 'logstash-devutils', '~> 1.3'
38
+ s.add_development_dependency 'logstash-devutils'
38
39
  s.add_development_dependency 'logstash-codec-json'
39
40
  s.add_development_dependency 'rspec-sequencing'
40
41
  s.add_development_dependency "rspec-wait"
@@ -23,9 +23,12 @@ module FileWatch
23
23
  let(:start_new_files_at) { :end } # should be irrelevant for read mode
24
24
  let(:opts) do
25
25
  {
26
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at,
27
- :delimiter => "\n", :discover_interval => discover_interval,
28
- :ignore_older => 3600, :sincedb_path => sincedb_path
26
+ :stat_interval => stat_interval,
27
+ :start_new_files_at => start_new_files_at,
28
+ :delimiter => "\n",
29
+ :discover_interval => discover_interval,
30
+ :ignore_older => 3600,
31
+ :sincedb_path => sincedb_path
29
32
  }
30
33
  end
31
34
  let(:observer) { TestObserver.new }
@@ -147,6 +150,50 @@ module FileWatch
147
150
  end
148
151
  end
149
152
 
153
+ context "when watching directory with files and adding a new file" do
154
+ let(:file_path2) { ::File.join(directory, "2.log") }
155
+ let(:file_path3) { ::File.join(directory, "3.log") }
156
+
157
+ let(:opts) { super.merge(:file_sort_by => "last_modified") }
158
+ let(:lines) { [] }
159
+ let(:observer) { TestObserver.new(lines) }
160
+
161
+
162
+ let(:listener2) { observer.listener_for(file_path2) }
163
+ let(:listener3) { observer.listener_for(file_path3) }
164
+
165
+ let(:actions) do
166
+ RSpec::Sequencing.run("create12") do
167
+ File.open(file_path, "w") { |file| file.write("string11\nstring12") }
168
+ File.open(file_path2, "w") { |file| file.write("string21\nstring22") }
169
+ end
170
+ .then("watch") do
171
+ reading.watch_this(watch_dir)
172
+ end
173
+ .then("wait12") do
174
+ wait(2).for { listener1.calls.last == :delete && listener2.calls.last == :delete }.to eq(true)
175
+ end
176
+ .then_after(2, "create3") do
177
+ File.open(file_path3, "w") { |file| file.write("string31\nstring32") }
178
+ end
179
+ .then("wait3") do
180
+ wait(2).for { listener3.calls.last == :delete }.to eq(true)
181
+ end
182
+ .then("quit") do
183
+ reading.quit
184
+ end
185
+ end
186
+
187
+ it "reads all (3) files" do
188
+ actions.activate_quietly
189
+ reading.subscribe(observer)
190
+ actions.assert_no_errors
191
+ expect(lines.last).to eq 'string32'
192
+ expect(lines.sort).to eq %w(string11 string12 string21 string22 string31 string32)
193
+ expect( reading.watch.watched_files_collection.paths ).to eq [ file_path, file_path2, file_path3 ]
194
+ end
195
+ end
196
+
150
197
  context "when watching a directory with files using exit_after_read" do
151
198
  let(:opts) { super.merge(:exit_after_read => true, :max_open_files => 2) }
152
199
  let(:file_path3) { ::File.join(directory, "3.log") }
@@ -159,40 +206,45 @@ module FileWatch
159
206
  let(:listener6) { observer.listener_for(file_path6) }
160
207
 
161
208
  it "the file is read" do
162
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
209
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
163
210
  reading.watch_this(watch_dir)
164
211
  reading.subscribe(observer)
165
212
  expect(listener3.lines).to eq(["line1", "line2"])
166
213
  end
214
+
167
215
  it "multiple files are read" do
168
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
216
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
169
217
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
170
218
  reading.watch_this(watch_dir)
171
219
  reading.subscribe(observer)
172
220
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4"])
173
221
  end
222
+
174
223
  it "multiple files are read even if max_open_files is smaller then number of files" do
175
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
224
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
176
225
  File.open(file_path4, "w") { |file| file.write("line3\nline4\n") }
177
226
  File.open(file_path5, "w") { |file| file.write("line5\nline6\n") }
178
227
  reading.watch_this(watch_dir)
179
228
  reading.subscribe(observer)
180
229
  expect(listener3.lines.sort).to eq(["line1", "line2", "line3", "line4", "line5", "line6"])
181
230
  end
231
+
182
232
  it "file as marked as reading_completed" do
183
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
233
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
184
234
  reading.watch_this(watch_dir)
185
235
  reading.subscribe(observer)
186
236
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
187
237
  end
238
+
188
239
  it "sincedb works correctly" do
189
- File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
240
+ File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
190
241
  reading.watch_this(watch_dir)
191
242
  reading.subscribe(observer)
192
243
  sincedb_record_fields = File.read(sincedb_path).split(" ")
193
244
  position_field_index = 3
194
245
  expect(sincedb_record_fields[position_field_index]).to eq("12")
195
246
  end
247
+
196
248
  it "does not include new files added after start" do
197
249
  File.open(file_path3, "w") { |file| file.write("line1\nline2\n") }
198
250
  reading.watch_this(watch_dir)
@@ -201,7 +253,6 @@ module FileWatch
201
253
  expect(listener3.lines).to eq(["line1", "line2"])
202
254
  expect(listener3.calls).to eq([:open, :accept, :accept, :eof, :delete, :reading_completed])
203
255
  expect(listener6.calls).to eq([])
204
-
205
256
  end
206
257
 
207
258
  end
@@ -73,7 +73,8 @@ module FileWatch
73
73
  FileUtils.mv(directory.join("1.logtmp").to_path, file1_path)
74
74
  end
75
75
  .then("wait for expectation") do
76
- wait(2).for{listener1.calls}.to eq([:open, :accept, :accept, :accept])
76
+ sleep(0.25) # if ENV['CI']
77
+ wait(2).for { listener1.calls }.to eq([:open, :accept, :accept, :accept])
77
78
  end
78
79
  .then("quit") do
79
80
  tailing.quit
@@ -1,3 +1,6 @@
1
+ require 'logstash/devutils/rspec/spec_helper'
2
+ require 'logstash/inputs/friendly_durations'
3
+
1
4
  describe FileWatch::Settings do
2
5
 
3
6
  context "when create from options" do
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require "rspec_sequencing"
3
- # require 'rspec/wait'
3
+ require 'rspec/wait'
4
4
  require "logstash/devutils/rspec/spec_helper"
5
5
  require "concurrent"
6
6
  require "timecop"
@@ -117,17 +117,12 @@ module FileWatch
117
117
  class Listener
118
118
  attr_reader :path, :lines, :calls
119
119
 
120
- def initialize(path)
120
+ def initialize(path, lines)
121
121
  @path = path
122
- @lines = Concurrent::Array.new
122
+ @lines = lines || Concurrent::Array.new
123
123
  @calls = Concurrent::Array.new
124
124
  end
125
125
 
126
- def add_lines(lines)
127
- @lines = lines
128
- self
129
- end
130
-
131
126
  def accept(line)
132
127
  @lines << line
133
128
  @calls << :accept
@@ -161,12 +156,7 @@ module FileWatch
161
156
  attr_reader :listeners
162
157
 
163
158
  def initialize(combined_lines = nil)
164
- listener_proc = if combined_lines.nil?
165
- lambda{|k| Listener.new(k) }
166
- else
167
- lambda{|k| Listener.new(k).add_lines(combined_lines) }
168
- end
169
- @listeners = Concurrent::Hash.new {|hash, key| hash[key] = listener_proc.call(key) }
159
+ @listeners = Concurrent::Hash.new { |hash, key| hash[key] = new_listener(key, combined_lines) }
170
160
  end
171
161
 
172
162
  def listener_for(path)
@@ -174,6 +164,14 @@ module FileWatch
174
164
  end
175
165
 
176
166
  def clear
177
- @listeners.clear; end
167
+ @listeners.clear
168
+ end
169
+
170
+ private
171
+
172
+ def new_listener(path, lines = nil)
173
+ Listener.new(path, lines)
174
+ end
175
+
178
176
  end
179
177
  end
@@ -10,15 +10,19 @@ module FileWatch
10
10
  let(:file_path) { ::File.join(directory, "1#{suffix}.log") }
11
11
  let(:file_path2) { ::File.join(directory, "2#{suffix}.log") }
12
12
  let(:file_path3) { ::File.join(directory, "3#{suffix}.log") }
13
- let(:max) { 4095 }
13
+ let(:max) { 4095 }
14
14
  let(:stat_interval) { 0.1 }
15
15
  let(:discover_interval) { 4 }
16
16
  let(:start_new_files_at) { :end }
17
17
  let(:sincedb_path) { ::File.join(directory, "tailing.sdb") }
18
18
  let(:opts) do
19
19
  {
20
- :stat_interval => stat_interval, :start_new_files_at => start_new_files_at, :max_open_files => max,
21
- :delimiter => "\n", :discover_interval => discover_interval, :sincedb_path => sincedb_path,
20
+ :stat_interval => stat_interval,
21
+ :start_new_files_at => start_new_files_at,
22
+ :max_open_files => max,
23
+ :delimiter => "\n",
24
+ :discover_interval => discover_interval,
25
+ :sincedb_path => sincedb_path,
22
26
  :file_sort_by => "path"
23
27
  }
24
28
  end
@@ -30,12 +34,11 @@ module FileWatch
30
34
 
31
35
  before do
32
36
  directory
33
- wait(1.0).for{Dir.exist?(directory)}.to eq(true)
37
+ wait(1.0).for { Dir.exist?(directory) }.to eq(true)
34
38
  end
35
39
 
36
40
  after do
37
41
  FileUtils.rm_rf(directory)
38
- wait(1.0).for{Dir.exist?(directory)}.to eq(false)
39
42
  end
40
43
 
41
44
  describe "max open files (set to 1)" do
@@ -95,16 +98,16 @@ module FileWatch
95
98
  let(:actions) do
96
99
  RSpec::Sequencing
97
100
  .run("create file") do
98
- File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
101
+ File.open(file_path, "wb") { |file| file.write("lineA\nlineB\n") }
99
102
  end
100
103
  .then_after(0.1, "begin watching") do
101
104
  tailing.watch_this(watch_dir)
102
105
  end
103
- .then_after(2, "add content") do
104
- File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
106
+ .then_after(1.0, "add content") do
107
+ File.open(file_path, "ab") { |file| file.write("line1\nline2\n") }
105
108
  end
106
109
  .then("wait") do
107
- wait(0.75).for{listener1.lines}.to eq(["line1", "line2"])
110
+ wait(0.75).for { listener1.lines }.to_not be_empty
108
111
  end
109
112
  .then("quit") do
110
113
  tailing.quit
@@ -113,7 +116,6 @@ module FileWatch
113
116
 
114
117
  it "only the new content is read" do
115
118
  actions.activate_quietly
116
- tailing.watch_this(watch_dir)
117
119
  tailing.subscribe(observer)
118
120
  actions.assert_no_errors
119
121
  expect(listener1.calls).to eq([:open, :accept, :accept])
@@ -132,7 +134,7 @@ module FileWatch
132
134
  File.open(file_path, "wb") { |file| file.write("line1\nline2\n") }
133
135
  end
134
136
  .then("wait") do
135
- wait(0.75).for{listener1.lines.size}.to eq(2)
137
+ wait(0.75).for { listener1.lines }.to_not be_empty
136
138
  end
137
139
  .then("quit") do
138
140
  tailing.quit
@@ -154,7 +156,7 @@ module FileWatch
154
156
  # so when a stat is taken on the file an error is raised
155
157
  let(:suffix) { "E" }
156
158
  let(:quit_after) { 0.2 }
157
- let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :identifier => nil, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
159
+ let(:stat) { double("stat", :size => 100, :modified_at => Time.now.to_f, :inode => 234567, :inode_struct => InodeStruct.new("234567", 1, 5)) }
158
160
  let(:watched_file) { WatchedFile.new(file_path, stat, tailing.settings) }
159
161
  before do
160
162
  allow(stat).to receive(:restat).and_raise(Errno::ENOENT)
@@ -35,5 +35,35 @@ module FileWatch
35
35
  expect(watched_file.recent_states).to eq([:watched, :active, :watched, :closed, :watched, :active, :unwatched, :active])
36
36
  end
37
37
  end
38
+
39
+ context 'restat' do
40
+
41
+ let(:directory) { Stud::Temporary.directory }
42
+ let(:file_path) { ::File.join(directory, "restat.file.txt") }
43
+ let(:pathname) { Pathname.new(file_path) }
44
+
45
+ before { FileUtils.touch file_path, :mtime => Time.now - 300 }
46
+
47
+ it 'reports false value when no changes' do
48
+ file = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new)
49
+ mtime = file.modified_at
50
+ expect( file.modified_at_changed? ).to be false
51
+ expect( file.restat! ).to be_falsy
52
+ expect( file.modified_at_changed? ).to be false
53
+ expect( file.modified_at ).to eql mtime
54
+ expect( file.modified_at(true) ).to eql mtime
55
+ end
56
+
57
+ it 'reports truthy when changes detected' do
58
+ file = WatchedFile.new(pathname, PathStatClass.new(pathname), Settings.new)
59
+ mtime = file.modified_at
60
+ expect( file.modified_at_changed? ).to be false
61
+ FileUtils.touch file_path
62
+ expect( file.restat! ).to be_truthy
63
+ expect( file.modified_at_changed? ).to be true
64
+ expect( file.modified_at ).to eql mtime # until updated
65
+ expect( file.modified_at(true) ).to be > mtime
66
+ end
67
+ end
38
68
  end
39
69
  end