logstash-input-file 4.1.1 → 4.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4177a21fb94dc920a0226ca698ed1bf5a6d96203a2582de150f1b1984b6ab8d
4
- data.tar.gz: 8cdbb99975efb2bda21ed8d1745f5f5f668a6275c89e35718e2a312365cdfd5a
3
+ metadata.gz: e860fa4a6695373c1ea4f70465392f3a95b290aefc499b0e2b5732dd99f5a0c6
4
+ data.tar.gz: 840ac241383e9a0777867da86a71ef3045c9201b0e26074526a8ace5033ee20b
5
5
  SHA512:
6
- metadata.gz: 4552c42974950d4f9490a13883ab77118e21b1f54fa1b7d3cfd83e99191660f15811df0364fe4ee8948fbffa7370aacc67ddc965dc2d02b44992586392bd9b63
7
- data.tar.gz: '00287236b17611e36d6754a265507aa6a31c18e76152e4265853ca7e1c36870865d298e66db905b87dd860b769f754a745abbf4f74eafb2511f91341d3a71c2a'
6
+ metadata.gz: fc311c3ecfe954c669d585cfb6791dfe901170b223e080540e4ecfbdec6a010df76ee3102dfbb732abab34b5cf3422c95b02452d1dddea125b7991be07f4a020
7
+ data.tar.gz: 97990bd44ba64927d16d70c2f7325d578d0b30798860f15a614c8aa6f73cf6ec9ec8ac311b80cabe40ae2ab5ef07372a816675ed4daa3128cdd97e49591828bd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 4.1.2
2
+ - Fix `require winhelper` error in WINDOWS.
3
+ [Issue #184](https://github.com/logstash-plugins/logstash-input-file/issues/184)
4
+ - Fix when no delimiter is found in a chunk, the chunk is reread - no forward progress
5
+ is made in the file.
6
+ [Issue #185](https://github.com/logstash-plugins/logstash-input-file/issues/185)
7
+
1
8
  ## 4.1.1
2
9
  - Fix JAR_VERSION read problem, prevented Logstash from starting.
3
10
  [Issue #180](https://github.com/logstash-plugins/logstash-input-file/issues/180)
@@ -37,7 +37,7 @@ module FileWatch
37
37
  require "jruby_file_watch"
38
38
 
39
39
  if LogStash::Environment.windows?
40
- require "winhelper"
40
+ require_relative "winhelper"
41
41
  FileOpener = FileExt
42
42
  InodeMixin = WindowsInode
43
43
  else
@@ -53,6 +53,8 @@ module FileWatch
53
53
  end
54
54
  end
55
55
 
56
+ BufferExtractResult = Struct.new(:lines, :warning, :additional)
57
+
56
58
  class NoSinceDBPathGiven < StandardError; end
57
59
 
58
60
  # how often (in seconds) we logger.warn a failed file open, per path.
@@ -5,28 +5,28 @@ module FileWatch module ReadMode module Handlers
5
5
  def handle_specifically(watched_file)
6
6
  if open_file(watched_file)
7
7
  add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
8
- # if the `file_chunk_count` * `file_chunk_size` is less than the file size
9
- # then this method will be executed multiple times
10
- # and the seek is moved to just after a line boundary as recorded in the sincedb
11
- # for each run - so we reset the buffer
12
- watched_file.reset_buffer
13
- watched_file.file_seek(watched_file.bytes_read)
14
8
  changed = false
15
9
  @settings.file_chunk_count.times do
16
10
  begin
17
- lines = watched_file.buffer_extract(watched_file.file_read(@settings.file_chunk_size))
18
- logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty?
11
+ data = watched_file.file_read(@settings.file_chunk_size)
12
+ result = watched_file.buffer_extract(data) # expect BufferExtractResult
13
+ logger.info(result.warning, result.additional) unless result.warning.empty?
19
14
  changed = true
20
- lines.each do |line|
15
+ result.lines.each do |line|
21
16
  watched_file.listener.accept(line)
17
+ # sincedb position is independent from the watched_file bytes_read
22
18
  sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
23
19
  end
20
+ # instead of tracking the bytes_read line by line we need to track by the data read size.
21
+ # because we initially seek to the bytes_read not the sincedb position
22
+ watched_file.increment_bytes_read(data.bytesize)
24
23
  rescue EOFError
25
24
  # flush the buffer now in case there is no final delimiter
26
25
  line = watched_file.buffer.flush
27
26
  watched_file.listener.accept(line) unless line.empty?
28
27
  watched_file.listener.eof
29
28
  watched_file.file_close
29
+ # unset_watched_file will set sincedb_value.position to be watched_file.bytes_read
30
30
  sincedb_collection.unset_watched_file(watched_file)
31
31
  watched_file.listener.deleted
32
32
  watched_file.unwatch
@@ -2,8 +2,14 @@
2
2
 
3
3
  module FileWatch
4
4
  # Tracks the position and expiry of the offset of a file-of-interest
5
+ # NOTE: the `watched_file.bytes_read` and this `sincedb_value.position` can diverge
6
+ # At any given moment IF the `watched_file.bytes_read` is greater than `sincedb_value.position`
7
+ # then it is larger to account for bytes held in the `watched_file.buffer`
8
+ # in Tail mode if we quit the buffer is not flushed and we restart from
9
+ # the `sincedb_value.position` (end of the last line read).
10
+ # in Read mode the buffer is flushed as a line and both values should be the same.
5
11
  class SincedbValue
6
- attr_reader :last_changed_at, :watched_file, :path_in_sincedb
12
+ attr_reader :last_changed_at, :watched_file, :path_in_sincedb, :position
7
13
 
8
14
  def initialize(position, last_changed_at = nil, watched_file = nil)
9
15
  @position = position # this is the value read from disk
@@ -21,27 +27,19 @@ module FileWatch
21
27
  @last_changed_at + duration
22
28
  end
23
29
 
24
- def position
25
- # either the value from disk or the current wf position
26
- @watched_file.nil? ? @position : @watched_file.bytes_read
27
- end
28
-
29
30
  def update_position(pos)
31
+ # called when we reset the position to bof or eof on shrink or file read complete
30
32
  touch
31
- if @watched_file.nil?
32
- @position = pos
33
- else
34
- @watched_file.update_bytes_read(pos)
35
- end
33
+ @position = pos
34
+ @watched_file.update_bytes_read(pos) unless @watched_file.nil?
36
35
  end
37
36
 
38
37
  def increment_position(pos)
38
+ # called when actual lines are sent to the observer listener
39
+ # this gets serialized as its a more true indication of position than
40
+ # chunk read size
39
41
  touch
40
- if watched_file.nil?
41
- @position += pos
42
- else
43
- @watched_file.increment_bytes_read(pos)
44
- end
42
+ @position += pos
45
43
  end
46
44
 
47
45
  def set_watched_file(watched_file)
@@ -69,6 +67,7 @@ module FileWatch
69
67
  end
70
68
 
71
69
  def unset_watched_file
70
+ # called in read mode only because we flushed any remaining bytes as a final line.
72
71
  # cache the position
73
72
  # we don't cache the path here because we know we are done with this file.
74
73
  # either due via the `delete` handling
@@ -42,13 +42,17 @@ module FileWatch module TailMode module Handlers
42
42
  @settings.file_chunk_count.times do
43
43
  begin
44
44
  data = watched_file.file_read(@settings.file_chunk_size)
45
- lines = watched_file.buffer_extract(data)
46
- logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty?
45
+ result = watched_file.buffer_extract(data) # expect BufferExtractResult
46
+ logger.info(result.warning, result.additional) unless result.warning.empty?
47
47
  changed = true
48
- lines.each do |line|
48
+ result.lines.each do |line|
49
49
  watched_file.listener.accept(line)
50
+ # sincedb position is now independent from the watched_file bytes_read
50
51
  sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
51
52
  end
53
+ # instead of tracking the bytes_read line by line we need to track by the data read size.
54
+ # because we seek to the bytes_read not the sincedb position
55
+ watched_file.increment_bytes_read(data.bytesize)
52
56
  rescue EOFError
53
57
  # it only makes sense to signal EOF in "read" mode not "tail"
54
58
  break
@@ -102,7 +102,20 @@ module FileWatch
102
102
  end
103
103
 
104
104
  def buffer_extract(data)
105
- @buffer.extract(data)
105
+ warning, additional = "", {}
106
+ lines = @buffer.extract(data)
107
+ if lines.empty?
108
+ warning.concat("buffer_extract: a delimiter can't be found in current chunk")
109
+ warning.concat(", maybe there are no more delimiters or the delimiter is incorrect")
110
+ warning.concat(" or the text before the delimiter, a 'line', is very large")
111
+ warning.concat(", if this message is logged often try increasing the `file_chunk_size` setting.")
112
+ additional["delimiter"] = @settings.delimiter
113
+ additional["read_position"] = @bytes_read
114
+ additional["bytes_read_count"] = data.bytesize
115
+ additional["last_known_file_size"] = @last_stat_size
116
+ additional["file_path"] = @path
117
+ end
118
+ BufferExtractResult.new(lines, warning, additional)
106
119
  end
107
120
 
108
121
  def increment_bytes_read(delta)
Binary file
@@ -142,7 +142,7 @@ class File < LogStash::Inputs::Base
142
142
 
143
143
  # When the file input discovers a file that was last modified
144
144
  # before the specified timespan in seconds, the file is ignored.
145
- # After it's discovery, if an ignored file is modified it is no
145
+ # After its discovery, if an ignored file is modified it is no
146
146
  # longer ignored and any new data is read. By default, this option is
147
147
  # disabled. Note this unit is in seconds.
148
148
  config :ignore_older, :validate => :number
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-file'
4
- s.version = '4.1.1'
4
+ s.version = '4.1.2'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative 'spec_helper'
2
3
 
3
4
  describe FileWatch::BufferedTokenizer do
@@ -1,4 +1,4 @@
1
-
1
+ # encoding: utf-8
2
2
  require 'stud/temporary'
3
3
  require_relative 'spec_helper'
4
4
  require 'filewatch/observing_read'
@@ -95,6 +95,25 @@ module FileWatch
95
95
  end
96
96
  end
97
97
 
98
+ context "when a non default delimiter is specified and it is not in the content" do
99
+ let(:opts) { super.merge(:delimiter => "\nø") }
100
+
101
+ it "the file is opened, data is read, but no lines are found initially, at EOF the whole file becomes the line" do
102
+ File.open(file_path, "wb") { |file| file.write("line1\nline2") }
103
+ actions.activate
104
+ reading.watch_this(watch_dir)
105
+ reading.subscribe(observer)
106
+ listener = observer.listener_for(file_path)
107
+ expect(listener.calls).to eq([:open, :accept, :eof, :delete])
108
+ expect(listener.lines).to eq(["line1\nline2"])
109
+ sincedb_record_fields = File.read(sincedb_path).split(" ")
110
+ position_field_index = 3
111
+ # tailing, no delimiter, we are expecting one, if it grows we read from the start.
112
+ # there is an info log telling us that no lines were seen but we can't test for it.
113
+ expect(sincedb_record_fields[position_field_index]).to eq("11")
114
+ end
115
+ end
116
+
98
117
  describe "reading fixtures" do
99
118
  let(:directory) { FIXTURE_DIR }
100
119
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require "rspec_sequencing"
2
3
  require 'rspec/wait'
3
4
  require "logstash/devutils/rspec/spec_helper"
@@ -118,3 +119,8 @@ module FileWatch
118
119
  @listeners.clear; end
119
120
  end
120
121
  end
122
+
123
+ ENV["LOG_AT"].tap do |level|
124
+ LogStash::Logging::Logger::configure_logging(level) unless level.nil?
125
+ end
126
+
@@ -1,11 +1,8 @@
1
-
1
+ # encoding: utf-8
2
2
  require 'stud/temporary'
3
3
  require_relative 'spec_helper'
4
4
  require 'filewatch/observing_tail'
5
5
 
6
- LogStash::Logging::Logger::configure_logging("WARN")
7
- # LogStash::Logging::Logger::configure_logging("DEBUG")
8
-
9
6
  module FileWatch
10
7
  describe Watch do
11
8
  before(:all) do
@@ -58,7 +55,6 @@ module FileWatch
58
55
  end
59
56
 
60
57
  context "when max_active is 1" do
61
-
62
58
  it "without close_older set, opens only 1 file" do
63
59
  actions.activate
64
60
  tailing.watch_this(watch_dir)
@@ -434,6 +430,33 @@ module FileWatch
434
430
  expect(observer.listener_for(file_path).calls).to eq([:open, :accept, :accept, :timed_out])
435
431
  end
436
432
  end
433
+
434
+ context "when a non default delimiter is specified and it is not in the content" do
435
+ let(:opts) { super.merge(:ignore_older => 20, :close_older => 1, :delimiter => "\nø") }
436
+ before do
437
+ RSpec::Sequencing
438
+ .run("create file") do
439
+ File.open(file_path, "wb") { |file| file.write("line1\nline2") }
440
+ end
441
+ .then("start watching before file ages more than close_older") do
442
+ tailing.watch_this(watch_dir)
443
+ end
444
+ .then_after(2.1, "quit after allowing time to close the file") do
445
+ tailing.quit
446
+ end
447
+ end
448
+
449
+ it "the file is opened, data is read, but no lines are found, the file times out" do
450
+ tailing.subscribe(observer)
451
+ expect(observer.listener_for(file_path).calls).to eq([:open, :timed_out])
452
+ expect(observer.listener_for(file_path).lines).to eq([])
453
+ sincedb_record_fields = File.read(sincedb_path).split(" ")
454
+ position_field_index = 3
455
+ # tailing, no delimiter, we are expecting one, if it grows we read from the start.
456
+ # there is an info log telling us that no lines were seen but we can't test for it.
457
+ expect(sincedb_record_fields[position_field_index]).to eq("0")
458
+ end
459
+ end
437
460
  end
438
461
  end
439
462
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require 'stud/temporary'
2
3
  require_relative 'spec_helper'
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require_relative 'spec_helper'
2
3
 
3
4
  module FileWatch
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require "stud/temporary"
2
3
  require "fileutils"
3
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.1
4
+ version: 4.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-01 00:00:00.000000000 Z
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement