logstash-input-file 4.1.1 → 4.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4177a21fb94dc920a0226ca698ed1bf5a6d96203a2582de150f1b1984b6ab8d
4
- data.tar.gz: 8cdbb99975efb2bda21ed8d1745f5f5f668a6275c89e35718e2a312365cdfd5a
3
+ metadata.gz: e860fa4a6695373c1ea4f70465392f3a95b290aefc499b0e2b5732dd99f5a0c6
4
+ data.tar.gz: 840ac241383e9a0777867da86a71ef3045c9201b0e26074526a8ace5033ee20b
5
5
  SHA512:
6
- metadata.gz: 4552c42974950d4f9490a13883ab77118e21b1f54fa1b7d3cfd83e99191660f15811df0364fe4ee8948fbffa7370aacc67ddc965dc2d02b44992586392bd9b63
7
- data.tar.gz: '00287236b17611e36d6754a265507aa6a31c18e76152e4265853ca7e1c36870865d298e66db905b87dd860b769f754a745abbf4f74eafb2511f91341d3a71c2a'
6
+ metadata.gz: fc311c3ecfe954c669d585cfb6791dfe901170b223e080540e4ecfbdec6a010df76ee3102dfbb732abab34b5cf3422c95b02452d1dddea125b7991be07f4a020
7
+ data.tar.gz: 97990bd44ba64927d16d70c2f7325d578d0b30798860f15a614c8aa6f73cf6ec9ec8ac311b80cabe40ae2ab5ef07372a816675ed4daa3128cdd97e49591828bd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 4.1.2
2
+ - Fix `require winhelper` error in WINDOWS.
3
+ [Issue #184](https://github.com/logstash-plugins/logstash-input-file/issues/184)
4
+ - Fix when no delimiter is found in a chunk, the chunk is reread - no forward progress
5
+ is made in the file.
6
+ [Issue #185](https://github.com/logstash-plugins/logstash-input-file/issues/185)
7
+
1
8
  ## 4.1.1
2
9
  - Fix JAR_VERSION read problem, prevented Logstash from starting.
3
10
  [Issue #180](https://github.com/logstash-plugins/logstash-input-file/issues/180)
@@ -37,7 +37,7 @@ module FileWatch
37
37
  require "jruby_file_watch"
38
38
 
39
39
  if LogStash::Environment.windows?
40
- require "winhelper"
40
+ require_relative "winhelper"
41
41
  FileOpener = FileExt
42
42
  InodeMixin = WindowsInode
43
43
  else
@@ -53,6 +53,8 @@ module FileWatch
53
53
  end
54
54
  end
55
55
 
56
+ BufferExtractResult = Struct.new(:lines, :warning, :additional)
57
+
56
58
  class NoSinceDBPathGiven < StandardError; end
57
59
 
58
60
  # how often (in seconds) we logger.warn a failed file open, per path.
@@ -5,28 +5,28 @@ module FileWatch module ReadMode module Handlers
5
5
  def handle_specifically(watched_file)
6
6
  if open_file(watched_file)
7
7
  add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
8
- # if the `file_chunk_count` * `file_chunk_size` is less than the file size
9
- # then this method will be executed multiple times
10
- # and the seek is moved to just after a line boundary as recorded in the sincedb
11
- # for each run - so we reset the buffer
12
- watched_file.reset_buffer
13
- watched_file.file_seek(watched_file.bytes_read)
14
8
  changed = false
15
9
  @settings.file_chunk_count.times do
16
10
  begin
17
- lines = watched_file.buffer_extract(watched_file.file_read(@settings.file_chunk_size))
18
- logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty?
11
+ data = watched_file.file_read(@settings.file_chunk_size)
12
+ result = watched_file.buffer_extract(data) # expect BufferExtractResult
13
+ logger.info(result.warning, result.additional) unless result.warning.empty?
19
14
  changed = true
20
- lines.each do |line|
15
+ result.lines.each do |line|
21
16
  watched_file.listener.accept(line)
17
+ # sincedb position is independent from the watched_file bytes_read
22
18
  sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
23
19
  end
20
+ # instead of tracking the bytes_read line by line we need to track by the data read size.
21
+ # because we initially seek to the bytes_read not the sincedb position
22
+ watched_file.increment_bytes_read(data.bytesize)
24
23
  rescue EOFError
25
24
  # flush the buffer now in case there is no final delimiter
26
25
  line = watched_file.buffer.flush
27
26
  watched_file.listener.accept(line) unless line.empty?
28
27
  watched_file.listener.eof
29
28
  watched_file.file_close
29
+ # unset_watched_file will set sincedb_value.position to be watched_file.bytes_read
30
30
  sincedb_collection.unset_watched_file(watched_file)
31
31
  watched_file.listener.deleted
32
32
  watched_file.unwatch
@@ -2,8 +2,14 @@
2
2
 
3
3
  module FileWatch
4
4
  # Tracks the position and expiry of the offset of a file-of-interest
5
+ # NOTE: the `watched_file.bytes_read` and this `sincedb_value.position` can diverge
6
+ # At any given moment IF the `watched_file.bytes_read` is greater than `sincedb_value.position`
7
+ # then it is larger to account for bytes held in the `watched_file.buffer`
8
+ # in Tail mode if we quit the buffer is not flushed and we restart from
9
+ # the `sincedb_value.position` (end of the last line read).
10
+ # in Read mode the buffer is flushed as a line and both values should be the same.
5
11
  class SincedbValue
6
- attr_reader :last_changed_at, :watched_file, :path_in_sincedb
12
+ attr_reader :last_changed_at, :watched_file, :path_in_sincedb, :position
7
13
 
8
14
  def initialize(position, last_changed_at = nil, watched_file = nil)
9
15
  @position = position # this is the value read from disk
@@ -21,27 +27,19 @@ module FileWatch
21
27
  @last_changed_at + duration
22
28
  end
23
29
 
24
- def position
25
- # either the value from disk or the current wf position
26
- @watched_file.nil? ? @position : @watched_file.bytes_read
27
- end
28
-
29
30
  def update_position(pos)
31
+ # called when we reset the position to bof or eof on shrink or file read complete
30
32
  touch
31
- if @watched_file.nil?
32
- @position = pos
33
- else
34
- @watched_file.update_bytes_read(pos)
35
- end
33
+ @position = pos
34
+ @watched_file.update_bytes_read(pos) unless @watched_file.nil?
36
35
  end
37
36
 
38
37
  def increment_position(pos)
38
+ # called when actual lines are sent to the observer listener
39
+ # this gets serialized as its a more true indication of position than
40
+ # chunk read size
39
41
  touch
40
- if watched_file.nil?
41
- @position += pos
42
- else
43
- @watched_file.increment_bytes_read(pos)
44
- end
42
+ @position += pos
45
43
  end
46
44
 
47
45
  def set_watched_file(watched_file)
@@ -69,6 +67,7 @@ module FileWatch
69
67
  end
70
68
 
71
69
  def unset_watched_file
70
+ # called in read mode only because we flushed any remaining bytes as a final line.
72
71
  # cache the position
73
72
  # we don't cache the path here because we know we are done with this file.
74
73
  # either due via the `delete` handling
@@ -42,13 +42,17 @@ module FileWatch module TailMode module Handlers
42
42
  @settings.file_chunk_count.times do
43
43
  begin
44
44
  data = watched_file.file_read(@settings.file_chunk_size)
45
- lines = watched_file.buffer_extract(data)
46
- logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty?
45
+ result = watched_file.buffer_extract(data) # expect BufferExtractResult
46
+ logger.info(result.warning, result.additional) unless result.warning.empty?
47
47
  changed = true
48
- lines.each do |line|
48
+ result.lines.each do |line|
49
49
  watched_file.listener.accept(line)
50
+ # sincedb position is now independent from the watched_file bytes_read
50
51
  sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
51
52
  end
53
+ # instead of tracking the bytes_read line by line we need to track by the data read size.
54
+ # because we seek to the bytes_read not the sincedb position
55
+ watched_file.increment_bytes_read(data.bytesize)
52
56
  rescue EOFError
53
57
  # it only makes sense to signal EOF in "read" mode not "tail"
54
58
  break
@@ -102,7 +102,20 @@ module FileWatch
102
102
  end
103
103
 
104
104
  def buffer_extract(data)
105
- @buffer.extract(data)
105
+ warning, additional = "", {}
106
+ lines = @buffer.extract(data)
107
+ if lines.empty?
108
+ warning.concat("buffer_extract: a delimiter can't be found in current chunk")
109
+ warning.concat(", maybe there are no more delimiters or the delimiter is incorrect")
110
+ warning.concat(" or the text before the delimiter, a 'line', is very large")
111
+ warning.concat(", if this message is logged often try increasing the `file_chunk_size` setting.")
112
+ additional["delimiter"] = @settings.delimiter
113
+ additional["read_position"] = @bytes_read
114
+ additional["bytes_read_count"] = data.bytesize
115
+ additional["last_known_file_size"] = @last_stat_size
116
+ additional["file_path"] = @path
117
+ end
118
+ BufferExtractResult.new(lines, warning, additional)
106
119
  end
107
120
 
108
121
  def increment_bytes_read(delta)
Binary file
@@ -142,7 +142,7 @@ class File < LogStash::Inputs::Base
142
142
 
143
143
  # When the file input discovers a file that was last modified
144
144
  # before the specified timespan in seconds, the file is ignored.
145
- # After it's discovery, if an ignored file is modified it is no
145
+ # After its discovery, if an ignored file is modified it is no
146
146
  # longer ignored and any new data is read. By default, this option is
147
147
  # disabled. Note this unit is in seconds.
148
148
  config :ignore_older, :validate => :number
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-file'
4
- s.version = '4.1.1'
4
+ s.version = '4.1.2'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative 'spec_helper'
2
3
 
3
4
  describe FileWatch::BufferedTokenizer do
@@ -1,4 +1,4 @@
1
-
1
+ # encoding: utf-8
2
2
  require 'stud/temporary'
3
3
  require_relative 'spec_helper'
4
4
  require 'filewatch/observing_read'
@@ -95,6 +95,25 @@ module FileWatch
95
95
  end
96
96
  end
97
97
 
98
+ context "when a non default delimiter is specified and it is not in the content" do
99
+ let(:opts) { super.merge(:delimiter => "\nø") }
100
+
101
+ it "the file is opened, data is read, but no lines are found initially, at EOF the whole file becomes the line" do
102
+ File.open(file_path, "wb") { |file| file.write("line1\nline2") }
103
+ actions.activate
104
+ reading.watch_this(watch_dir)
105
+ reading.subscribe(observer)
106
+ listener = observer.listener_for(file_path)
107
+ expect(listener.calls).to eq([:open, :accept, :eof, :delete])
108
+ expect(listener.lines).to eq(["line1\nline2"])
109
+ sincedb_record_fields = File.read(sincedb_path).split(" ")
110
+ position_field_index = 3
111
+ # tailing, no delimiter, we are expecting one, if it grows we read from the start.
112
+ # there is an info log telling us that no lines were seen but we can't test for it.
113
+ expect(sincedb_record_fields[position_field_index]).to eq("11")
114
+ end
115
+ end
116
+
98
117
  describe "reading fixtures" do
99
118
  let(:directory) { FIXTURE_DIR }
100
119
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require "rspec_sequencing"
2
3
  require 'rspec/wait'
3
4
  require "logstash/devutils/rspec/spec_helper"
@@ -118,3 +119,8 @@ module FileWatch
118
119
  @listeners.clear; end
119
120
  end
120
121
  end
122
+
123
+ ENV["LOG_AT"].tap do |level|
124
+ LogStash::Logging::Logger::configure_logging(level) unless level.nil?
125
+ end
126
+
@@ -1,11 +1,8 @@
1
-
1
+ # encoding: utf-8
2
2
  require 'stud/temporary'
3
3
  require_relative 'spec_helper'
4
4
  require 'filewatch/observing_tail'
5
5
 
6
- LogStash::Logging::Logger::configure_logging("WARN")
7
- # LogStash::Logging::Logger::configure_logging("DEBUG")
8
-
9
6
  module FileWatch
10
7
  describe Watch do
11
8
  before(:all) do
@@ -58,7 +55,6 @@ module FileWatch
58
55
  end
59
56
 
60
57
  context "when max_active is 1" do
61
-
62
58
  it "without close_older set, opens only 1 file" do
63
59
  actions.activate
64
60
  tailing.watch_this(watch_dir)
@@ -434,6 +430,33 @@ module FileWatch
434
430
  expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out])
435
431
  end
436
432
  end
433
+
434
+ context "when a non default delimiter is specified and it is not in the content" do
435
+ let(:opts) { super.merge(:ignore_older => 20, :close_older => 1, :delimiter => "\nø") }
436
+ before do
437
+ RSpec::Sequencing
438
+ .run("create file") do
439
+ File.open(file_path, "wb") { |file| file.write("line1\nline2") }
440
+ end
441
+ .then("start watching before file ages more than close_older") do
442
+ tailing.watch_this(watch_dir)
443
+ end
444
+ .then_after(2.1, "quit after allowing time to close the file") do
445
+ tailing.quit
446
+ end
447
+ end
448
+
449
+ it "the file is opened, data is read, but no lines are found, the file times out" do
450
+ tailing.subscribe(observer)
451
+ expect(observer.listener_for(file_path).calls).to eq([:open, :timed_out])
452
+ expect(observer.listener_for(file_path).lines).to eq([])
453
+ sincedb_record_fields = File.read(sincedb_path).split(" ")
454
+ position_field_index = 3
455
+ # tailing, no delimiter, we are expecting one, if it grows we read from the start.
456
+ # there is an info log telling us that no lines were seen but we can't test for it.
457
+ expect(sincedb_record_fields[position_field_index]).to eq("0")
458
+ end
459
+ end
437
460
  end
438
461
  end
439
462
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require 'stud/temporary'
2
3
  require_relative 'spec_helper'
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative 'spec_helper'
2
3
 
3
4
  module FileWatch
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require "stud/temporary"
2
3
  require "fileutils"
3
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.1
4
+ version: 4.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-01 00:00:00.000000000 Z
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement