logstash-input-file 4.1.3 → 4.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/JAR_VERSION +1 -1
- data/README.md +0 -3
- data/docs/index.asciidoc +26 -16
- data/lib/filewatch/bootstrap.rb +10 -21
- data/lib/filewatch/discoverer.rb +35 -28
- data/lib/filewatch/observing_base.rb +2 -1
- data/lib/filewatch/read_mode/handlers/base.rb +19 -6
- data/lib/filewatch/read_mode/handlers/read_file.rb +43 -32
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +8 -3
- data/lib/filewatch/read_mode/processor.rb +8 -8
- data/lib/filewatch/settings.rb +3 -3
- data/lib/filewatch/sincedb_collection.rb +56 -42
- data/lib/filewatch/sincedb_value.rb +6 -0
- data/lib/filewatch/stat/generic.rb +34 -0
- data/lib/filewatch/stat/windows_path.rb +32 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +40 -22
- data/lib/filewatch/tail_mode/handlers/create.rb +1 -2
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +2 -1
- data/lib/filewatch/tail_mode/handlers/delete.rb +13 -1
- data/lib/filewatch/tail_mode/handlers/grow.rb +5 -2
- data/lib/filewatch/tail_mode/handlers/shrink.rb +7 -4
- data/lib/filewatch/tail_mode/handlers/unignore.rb +4 -2
- data/lib/filewatch/tail_mode/processor.rb +147 -58
- data/lib/filewatch/watch.rb +15 -35
- data/lib/filewatch/watched_file.rb +237 -41
- data/lib/filewatch/watched_files_collection.rb +2 -2
- data/lib/filewatch/winhelper.rb +167 -25
- data/lib/jars/filewatch-1.0.1.jar +0 -0
- data/lib/logstash/inputs/file.rb +9 -2
- data/logstash-input-file.gemspec +9 -2
- data/spec/file_ext/file_ext_windows_spec.rb +36 -0
- data/spec/filewatch/read_mode_handlers_read_file_spec.rb +2 -2
- data/spec/filewatch/reading_spec.rb +100 -57
- data/spec/filewatch/rotate_spec.rb +451 -0
- data/spec/filewatch/spec_helper.rb +33 -10
- data/spec/filewatch/tailing_spec.rb +273 -153
- data/spec/filewatch/watched_file_spec.rb +3 -3
- data/spec/filewatch/watched_files_collection_spec.rb +3 -3
- data/spec/filewatch/winhelper_spec.rb +4 -5
- data/spec/helpers/logging_level_helper.rb +8 -0
- data/spec/helpers/rspec_wait_handler_helper.rb +38 -0
- data/spec/helpers/spec_helper.rb +7 -1
- data/spec/inputs/file_read_spec.rb +54 -24
- data/spec/inputs/file_tail_spec.rb +244 -284
- metadata +13 -3
- data/lib/jars/filewatch-1.0.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7cb22ef0489b15ab92212c179444aefcd5be9e95fc4984c28f83de3397bf54c
|
4
|
+
data.tar.gz: 7a68f15af95595390a4f44e3fd9f641522e1e8ce3feafb7b299b1e056b5b2df7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fedb59ccffbf65f45f1f88dc76cd5e45d126866f75e4eb9271ef44a1e2b7922f98ff49643272cfdfa5f1ac01d445104f8a6c308f71e8255e85cc19c95809122a
|
7
|
+
data.tar.gz: d10e33bf905addfc71a5ce4ce026f3fe4b7f80871b1ba087f41fc3ed7b7981289986ec61a5b4b96ef9bd37af4b26ace1219b99c723032a49a1779ee9e52010f2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 4.1.4
|
2
|
+
- Fixed a regression where files discovered after first discovery were not
|
3
|
+
always read from the beginning. Applies to tail mode only.
|
4
|
+
[#198](https://github.com/logstash-plugins/logstash-input-file/issues/198)
|
5
|
+
- Added much better support for file rotation schemes of copy/truncate and
|
6
|
+
rename cascading. Applies to tail mode only.
|
7
|
+
- Added support for processing files over remote mounts e.g. NFS. Before, it
|
8
|
+
was possible to read into memory allocated but not filled with data resulting
|
9
|
+
in ASCII NUL (0) bytes in the message field. Now, files are read up to the
|
10
|
+
size as given by the remote filesystem client. Applies to tail and read modes.
|
1
11
|
## 4.1.3
|
2
12
|
- Fixed `read` mode of regular files sincedb write is requested in each read loop
|
3
13
|
iteration rather than waiting for the end-of-file to be reached. Note: for gz files,
|
data/JAR_VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.1
|
data/README.md
CHANGED
@@ -2,9 +2,6 @@
|
|
2
2
|
Travis Build
|
3
3
|
[![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-file.svg)](https://travis-ci.org/logstash-plugins/logstash-input-file)
|
4
4
|
|
5
|
-
Jenkins Build
|
6
|
-
[![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-file.svg)](https://travis-ci.org/logstash-plugins/logstash-input-file)
|
7
|
-
|
8
5
|
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
9
6
|
|
10
7
|
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
data/docs/index.asciidoc
CHANGED
@@ -78,12 +78,6 @@ Read mode also allows for an action to take place after processing the file comp
|
|
78
78
|
In the past attempts to simulate a Read mode while still assuming infinite streams
|
79
79
|
was not ideal and a dedicated Read mode is an improvement.
|
80
80
|
|
81
|
-
==== Reading from remote network volumes
|
82
|
-
|
83
|
-
The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These
|
84
|
-
remote filesystems typically have behaviors that are very different from local filesystems and
|
85
|
-
are therefore unlikely to work correctly when used with the file input.
|
86
|
-
|
87
81
|
==== Tracking of current position in watched files
|
88
82
|
|
89
83
|
The plugin keeps track of the current position in each file by
|
@@ -103,6 +97,10 @@ A different `sincedb_path` must be used for each input. Using the same
|
|
103
97
|
path will cause issues. The read checkpoints for each input must be
|
104
98
|
stored in a different path so the information does not override.
|
105
99
|
|
100
|
+
Files are tracked via an identifier. This identifier is made up of the
|
101
|
+
inode, major device number and minor device number. In windows, a different
|
102
|
+
identifier is taken from a `kernel32` API call.
|
103
|
+
|
106
104
|
Sincedb records can now be expired meaning that read positions of older files
|
107
105
|
will not be remembered after a certain time period. File systems may need to reuse
|
108
106
|
inodes for new content. Ideally, we would not use the read position of old content,
|
@@ -123,6 +121,19 @@ old sincedb records converted to the new format, this is blank.
|
|
123
121
|
On non-Windows systems you can obtain the inode number of a file
|
124
122
|
with e.g. `ls -li`.
|
125
123
|
|
124
|
+
==== Reading from remote network volumes
|
125
|
+
|
126
|
+
The file input is not thoroughly tested on remote filesystems such as NFS,
|
127
|
+
Samba, s3fs-fuse, etc, however NFS is occasionally tested. The file size as given by
|
128
|
+
the remote FS client is used to govern how much data to read at any given time to
|
129
|
+
prevent reading into allocated but yet unfilled memory.
|
130
|
+
As we use the device major and minor in the identifier to track "last read"
|
131
|
+
positions of files and on remount the device major and minor can change, the
|
132
|
+
sincedb records may not match across remounts.
|
133
|
+
Read mode might not be suitable for remote filesystems as the file size at
|
134
|
+
discovery on the client side may not be the same as the file size on the remote side
|
135
|
+
due to latency in the remote to client copy process.
|
136
|
+
|
126
137
|
==== File rotation in Tail mode
|
127
138
|
|
128
139
|
File rotation is detected and handled by this input, regardless of
|
@@ -130,16 +141,15 @@ whether the file is rotated via a rename or a copy operation. To
|
|
130
141
|
support programs that write to the rotated file for some time after
|
131
142
|
the rotation has taken place, include both the original filename and
|
132
143
|
the rotated filename (e.g. /var/log/syslog and /var/log/syslog.1) in
|
133
|
-
the filename patterns to watch (the `path` option).
|
134
|
-
|
135
|
-
`
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
will not get picked up.
|
144
|
+
the filename patterns to watch (the `path` option).
|
145
|
+
For a rename, the inode will be detected as having moved from
|
146
|
+
`/var/log/syslog` to `/var/log/syslog.1` and so the "state" is moved
|
147
|
+
internally too, the old content will not be reread but any new content
|
148
|
+
on the renamed file will be read.
|
149
|
+
For copy/truncate the copied content into a new file path, if discovered, will
|
150
|
+
be treated as a new discovery and be read from the beginning. The copied file
|
151
|
+
paths should therefore not be in the filename patterns to watch (the `path` option).
|
152
|
+
The truncation will be detected and the "last read" position updated to zero.
|
143
153
|
|
144
154
|
[id="plugins-{type}s-{plugin}-options"]
|
145
155
|
==== File Input Configuration Options
|
data/lib/filewatch/bootstrap.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
require "rbconfig"
|
3
2
|
require "pathname"
|
4
|
-
# require "logstash/environment"
|
5
3
|
|
6
4
|
## Common setup
|
7
5
|
# all the required constants and files
|
@@ -13,36 +11,26 @@ module FileWatch
|
|
13
11
|
# this is used in the read loop e.g.
|
14
12
|
# @opts[:file_chunk_count].times do
|
15
13
|
# where file_chunk_count defaults to this constant
|
16
|
-
|
14
|
+
MAX_ITERATIONS = (2**(0.size * 8 - 2) - 2) / 32768
|
17
15
|
|
18
16
|
require_relative "helper"
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
[fileId, 0, 0] # dev_* doesn't make sense on Windows
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
module UnixInode
|
28
|
-
def prepare_inode(path, stat)
|
29
|
-
[stat.ino.to_s, stat.dev_major, stat.dev_minor]
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
jar_version = Pathname.new(__FILE__).dirname.join("../../JAR_VERSION").realpath.read.strip
|
34
|
-
|
18
|
+
gem_root_dir = Pathname.new(__FILE__).dirname.join("../../").realpath
|
19
|
+
jar_version = gem_root_dir.join("JAR_VERSION").read.strip
|
20
|
+
fullpath = gem_root_dir.join("lib/jars/filewatch-#{jar_version}.jar").expand_path.to_path
|
35
21
|
require "java"
|
36
|
-
|
22
|
+
require fullpath
|
37
23
|
require "jruby_file_watch"
|
38
24
|
|
39
25
|
if LogStash::Environment.windows?
|
40
26
|
require_relative "winhelper"
|
27
|
+
require_relative "stat/windows_path"
|
28
|
+
PathStatClass = Stat::WindowsPath
|
41
29
|
FileOpener = FileExt
|
42
|
-
InodeMixin = WindowsInode
|
43
30
|
else
|
31
|
+
require_relative "stat/generic"
|
32
|
+
PathStatClass = Stat::Generic
|
44
33
|
FileOpener = ::File
|
45
|
-
InodeMixin = UnixInode
|
46
34
|
end
|
47
35
|
|
48
36
|
# Structs can be used as hash keys because they compare by value
|
@@ -54,6 +42,7 @@ module FileWatch
|
|
54
42
|
end
|
55
43
|
|
56
44
|
BufferExtractResult = Struct.new(:lines, :warning, :additional)
|
45
|
+
LoopControlResult = Struct.new(:count, :size, :more)
|
57
46
|
|
58
47
|
class NoSinceDBPathGiven < StandardError; end
|
59
48
|
|
data/lib/filewatch/discoverer.rb
CHANGED
@@ -10,8 +10,8 @@ module FileWatch
|
|
10
10
|
include LogStash::Util::Loggable
|
11
11
|
|
12
12
|
def initialize(watched_files_collection, sincedb_collection, settings)
|
13
|
-
@watching =
|
14
|
-
@exclude =
|
13
|
+
@watching = Concurrent::Array.new
|
14
|
+
@exclude = Concurrent::Array.new
|
15
15
|
@watched_files_collection = watched_files_collection
|
16
16
|
@sincedb_collection = sincedb_collection
|
17
17
|
@settings = settings
|
@@ -21,13 +21,13 @@ module FileWatch
|
|
21
21
|
def add_path(path)
|
22
22
|
return if @watching.member?(path)
|
23
23
|
@watching << path
|
24
|
-
|
24
|
+
discover_files_new_path(path)
|
25
25
|
self
|
26
26
|
end
|
27
27
|
|
28
28
|
def discover
|
29
29
|
@watching.each do |path|
|
30
|
-
|
30
|
+
discover_files_ongoing(path)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -37,7 +37,7 @@ module FileWatch
|
|
37
37
|
@exclude.each do |pattern|
|
38
38
|
if watched_file.pathname.fnmatch?(pattern)
|
39
39
|
if new_discovery
|
40
|
-
logger.
|
40
|
+
logger.trace("Discoverer can_exclude?: #{watched_file.path}: skipping " +
|
41
41
|
"because it matches exclude #{pattern}")
|
42
42
|
end
|
43
43
|
watched_file.unwatch
|
@@ -47,45 +47,52 @@ module FileWatch
|
|
47
47
|
false
|
48
48
|
end
|
49
49
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
50
|
+
def discover_files_new_path(path)
|
51
|
+
discover_any_files(path, false)
|
52
|
+
end
|
53
|
+
|
54
|
+
def discover_files_ongoing(path)
|
55
|
+
discover_any_files(path, true)
|
56
|
+
end
|
57
|
+
|
58
|
+
def discover_any_files(path, ongoing)
|
59
|
+
fileset = Dir.glob(path).select{|f| File.file?(f) && !File.symlink?(f)}
|
60
|
+
logger.trace("discover_files", "count" => fileset.size)
|
61
|
+
fileset.each do |file|
|
56
62
|
pathname = Pathname.new(file)
|
57
|
-
next unless pathname.file?
|
58
|
-
next if pathname.symlink?
|
59
63
|
new_discovery = false
|
60
64
|
watched_file = @watched_files_collection.watched_file_by_path(file)
|
61
65
|
if watched_file.nil?
|
62
|
-
logger.debug("Discoverer discover_files: #{path}: new: #{file} (exclude is #{@exclude.inspect})")
|
63
66
|
new_discovery = true
|
64
|
-
watched_file = WatchedFile.new(pathname, pathname
|
67
|
+
watched_file = WatchedFile.new(pathname, PathStatClass.new(pathname), @settings)
|
65
68
|
end
|
66
69
|
# if it already unwatched or its excluded then we can skip
|
67
70
|
next if watched_file.unwatched? || can_exclude?(watched_file, new_discovery)
|
68
71
|
|
72
|
+
logger.trace("discover_files handling:", "new discovery"=> new_discovery, "watched_file details" => watched_file.details)
|
73
|
+
|
69
74
|
if new_discovery
|
70
|
-
if
|
71
|
-
logger.debug("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago")
|
72
|
-
# on discovery ignorable watched_files are put into the ignored state and that
|
73
|
-
# updates the size from the internal stat
|
74
|
-
# so the existing contents are not read.
|
75
|
-
# because, normally, a newly discovered file will
|
76
|
-
# have a watched_file size of zero
|
77
|
-
# they are still added to the collection so we know they are there for the next periodic discovery
|
78
|
-
watched_file.ignore
|
79
|
-
end
|
80
|
-
# now add the discovered file to the watched_files collection and adjust the sincedb collections
|
81
|
-
@watched_files_collection.add(watched_file)
|
75
|
+
watched_file.initial_completed if ongoing
|
82
76
|
# initially when the sincedb collection is filled with records from the persistence file
|
83
77
|
# each value is not associated with a watched file
|
84
78
|
# a sincedb_value can be:
|
85
79
|
# unassociated
|
86
80
|
# associated with this watched_file
|
87
81
|
# associated with a different watched_file
|
88
|
-
@sincedb_collection.associate(watched_file)
|
82
|
+
if @sincedb_collection.associate(watched_file)
|
83
|
+
if watched_file.file_ignorable?
|
84
|
+
logger.trace("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago")
|
85
|
+
# on discovery ignorable watched_files are put into the ignored state and that
|
86
|
+
# updates the size from the internal stat
|
87
|
+
# so the existing contents are not read.
|
88
|
+
# because, normally, a newly discovered file will
|
89
|
+
# have a watched_file size of zero
|
90
|
+
# they are still added to the collection so we know they are there for the next periodic discovery
|
91
|
+
watched_file.ignore_as_unread
|
92
|
+
end
|
93
|
+
# now add the discovered file to the watched_files collection and adjust the sincedb collections
|
94
|
+
@watched_files_collection.add(watched_file)
|
95
|
+
end
|
89
96
|
end
|
90
97
|
# at this point the watched file is created, is in the db but not yet opened or being processed
|
91
98
|
end
|
@@ -44,7 +44,8 @@ module FileWatch
|
|
44
44
|
:exclude => [],
|
45
45
|
:start_new_files_at => :end,
|
46
46
|
:delimiter => "\n",
|
47
|
-
:file_chunk_count =>
|
47
|
+
:file_chunk_count => MAX_ITERATIONS,
|
48
|
+
:file_chunk_size => FILE_READ_SIZE,
|
48
49
|
:file_sort_by => "last_modified",
|
49
50
|
:file_sort_direction => "asc",
|
50
51
|
}.merge(opts)
|
@@ -19,7 +19,7 @@ module FileWatch module ReadMode module Handlers
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def handle(watched_file)
|
22
|
-
logger.
|
22
|
+
logger.trace("handling: #{watched_file.path}")
|
23
23
|
unless watched_file.has_listener?
|
24
24
|
watched_file.set_listener(@observer)
|
25
25
|
end
|
@@ -34,7 +34,7 @@ module FileWatch module ReadMode module Handlers
|
|
34
34
|
|
35
35
|
def open_file(watched_file)
|
36
36
|
return true if watched_file.file_open?
|
37
|
-
logger.
|
37
|
+
logger.trace("opening #{watched_file.path}")
|
38
38
|
begin
|
39
39
|
watched_file.open
|
40
40
|
rescue
|
@@ -46,7 +46,7 @@ module FileWatch module ReadMode module Handlers
|
|
46
46
|
logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}")
|
47
47
|
watched_file.last_open_warning_at = now
|
48
48
|
else
|
49
|
-
logger.
|
49
|
+
logger.trace("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
|
50
50
|
end
|
51
51
|
watched_file.watch # set it back to watch so we can try it again
|
52
52
|
end
|
@@ -65,13 +65,26 @@ module FileWatch module ReadMode module Handlers
|
|
65
65
|
elsif sincedb_value.watched_file == watched_file
|
66
66
|
update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
67
67
|
else
|
68
|
-
|
68
|
+
msg = "add_or_update_sincedb_collection: the found sincedb_value has a watched_file - this is a rename, switching inode to this watched file"
|
69
|
+
logger.trace(msg)
|
70
|
+
existing_watched_file = sincedb_value.watched_file
|
71
|
+
if existing_watched_file.nil?
|
72
|
+
sincedb_value.set_watched_file(watched_file)
|
73
|
+
logger.trace("add_or_update_sincedb_collection: switching as new file")
|
74
|
+
watched_file.rotate_as_file
|
75
|
+
watched_file.update_bytes_read(sincedb_value.position)
|
76
|
+
else
|
77
|
+
sincedb_value.set_watched_file(watched_file)
|
78
|
+
logger.trace("add_or_update_sincedb_collection: switching from...", "watched_file details" => watched_file.details)
|
79
|
+
watched_file.rotate_from(existing_watched_file)
|
80
|
+
end
|
81
|
+
|
69
82
|
end
|
70
83
|
watched_file.initial_completed
|
71
84
|
end
|
72
85
|
|
73
86
|
def update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
74
|
-
logger.
|
87
|
+
logger.trace("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
|
75
88
|
# sincedb_value is the source of truth
|
76
89
|
watched_file.update_bytes_read(sincedb_value.position)
|
77
90
|
end
|
@@ -79,7 +92,7 @@ module FileWatch module ReadMode module Handlers
|
|
79
92
|
def add_new_value_sincedb_collection(watched_file)
|
80
93
|
sincedb_value = SincedbValue.new(0)
|
81
94
|
sincedb_value.set_watched_file(watched_file)
|
82
|
-
logger.
|
95
|
+
logger.trace("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position)
|
83
96
|
sincedb_collection.set(watched_file.sincedb_key, sincedb_value)
|
84
97
|
end
|
85
98
|
end
|
@@ -5,40 +5,51 @@ module FileWatch module ReadMode module Handlers
|
|
5
5
|
def handle_specifically(watched_file)
|
6
6
|
if open_file(watched_file)
|
7
7
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
8
|
-
|
8
|
+
loop do
|
9
9
|
break if quit?
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
10
|
+
loop_control = watched_file.loop_control_adjusted_for_stat_size
|
11
|
+
controlled_read(watched_file, loop_control)
|
12
|
+
sincedb_collection.request_disk_flush
|
13
|
+
break unless loop_control.more
|
14
|
+
end
|
15
|
+
if watched_file.all_read?
|
16
|
+
# flush the buffer now in case there is no final delimiter
|
17
|
+
line = watched_file.buffer.flush
|
18
|
+
watched_file.listener.accept(line) unless line.empty?
|
19
|
+
watched_file.listener.eof
|
20
|
+
watched_file.file_close
|
21
|
+
key = watched_file.sincedb_key
|
22
|
+
sincedb_collection.reading_completed(key)
|
23
|
+
sincedb_collection.clear_watched_file(key)
|
24
|
+
watched_file.listener.deleted
|
25
|
+
watched_file.unwatch
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def controlled_read(watched_file, loop_control)
|
31
|
+
logger.trace("reading...", "iterations" => loop_control.count, "amount" => loop_control.size, "filename" => watched_file.filename)
|
32
|
+
loop_control.count.times do
|
33
|
+
begin
|
34
|
+
result = watched_file.read_extract_lines(loop_control.size) # expect BufferExtractResult
|
35
|
+
logger.info(result.warning, result.additional) unless result.warning.empty?
|
36
|
+
result.lines.each do |line|
|
37
|
+
watched_file.listener.accept(line)
|
38
|
+
# sincedb position is independent from the watched_file bytes_read
|
39
|
+
delta = line.bytesize + @settings.delimiter_byte_size
|
40
|
+
sincedb_collection.increment(watched_file.sincedb_key, delta)
|
41
41
|
end
|
42
|
+
rescue EOFError
|
43
|
+
logger.error("controlled_read: eof error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8))
|
44
|
+
break
|
45
|
+
rescue Errno::EWOULDBLOCK, Errno::EINTR
|
46
|
+
logger.error("controlled_read: block or interrupt error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8))
|
47
|
+
watched_file.listener.error
|
48
|
+
break
|
49
|
+
rescue => e
|
50
|
+
logger.error("controlled_read: general error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8))
|
51
|
+
watched_file.listener.error
|
52
|
+
break
|
42
53
|
end
|
43
54
|
end
|
44
55
|
end
|
@@ -13,6 +13,11 @@ module FileWatch module ReadMode module Handlers
|
|
13
13
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
14
14
|
# can't really stripe read a zip file, its all or nothing.
|
15
15
|
watched_file.listener.opened
|
16
|
+
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
17
|
+
# should we track lines read in the sincedb and
|
18
|
+
# fast forward through the lines until we reach unseen content?
|
19
|
+
# meaning that we can quit in the middle of a zip file
|
20
|
+
key = watched_file.sincedb_key
|
16
21
|
begin
|
17
22
|
file_stream = FileInputStream.new(watched_file.path)
|
18
23
|
gzip_stream = GZIPInputStream.new(file_stream)
|
@@ -31,8 +36,8 @@ module FileWatch module ReadMode module Handlers
|
|
31
36
|
logger.error("Cannot decompress the gzip file at path: #{watched_file.path}")
|
32
37
|
watched_file.listener.error
|
33
38
|
else
|
34
|
-
|
35
|
-
sincedb_collection.
|
39
|
+
sincedb_collection.store_last_read(key, watched_file.last_stat_size)
|
40
|
+
sincedb_collection.request_disk_flush
|
36
41
|
watched_file.listener.deleted
|
37
42
|
watched_file.unwatch
|
38
43
|
ensure
|
@@ -42,7 +47,7 @@ module FileWatch module ReadMode module Handlers
|
|
42
47
|
close_and_ignore_ioexception(gzip_stream) unless gzip_stream.nil?
|
43
48
|
close_and_ignore_ioexception(file_stream) unless file_stream.nil?
|
44
49
|
end
|
45
|
-
sincedb_collection.
|
50
|
+
sincedb_collection.clear_watched_file(key)
|
46
51
|
end
|
47
52
|
|
48
53
|
private
|