logstash-input-file 4.1.3 → 4.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/JAR_VERSION +1 -1
- data/README.md +0 -3
- data/docs/index.asciidoc +26 -16
- data/lib/filewatch/bootstrap.rb +10 -21
- data/lib/filewatch/discoverer.rb +35 -28
- data/lib/filewatch/observing_base.rb +2 -1
- data/lib/filewatch/read_mode/handlers/base.rb +19 -6
- data/lib/filewatch/read_mode/handlers/read_file.rb +43 -32
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +8 -3
- data/lib/filewatch/read_mode/processor.rb +8 -8
- data/lib/filewatch/settings.rb +3 -3
- data/lib/filewatch/sincedb_collection.rb +56 -42
- data/lib/filewatch/sincedb_value.rb +6 -0
- data/lib/filewatch/stat/generic.rb +34 -0
- data/lib/filewatch/stat/windows_path.rb +32 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +40 -22
- data/lib/filewatch/tail_mode/handlers/create.rb +1 -2
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +2 -1
- data/lib/filewatch/tail_mode/handlers/delete.rb +13 -1
- data/lib/filewatch/tail_mode/handlers/grow.rb +5 -2
- data/lib/filewatch/tail_mode/handlers/shrink.rb +7 -4
- data/lib/filewatch/tail_mode/handlers/unignore.rb +4 -2
- data/lib/filewatch/tail_mode/processor.rb +147 -58
- data/lib/filewatch/watch.rb +15 -35
- data/lib/filewatch/watched_file.rb +237 -41
- data/lib/filewatch/watched_files_collection.rb +2 -2
- data/lib/filewatch/winhelper.rb +167 -25
- data/lib/jars/filewatch-1.0.1.jar +0 -0
- data/lib/logstash/inputs/file.rb +9 -2
- data/logstash-input-file.gemspec +9 -2
- data/spec/file_ext/file_ext_windows_spec.rb +36 -0
- data/spec/filewatch/read_mode_handlers_read_file_spec.rb +2 -2
- data/spec/filewatch/reading_spec.rb +100 -57
- data/spec/filewatch/rotate_spec.rb +451 -0
- data/spec/filewatch/spec_helper.rb +33 -10
- data/spec/filewatch/tailing_spec.rb +273 -153
- data/spec/filewatch/watched_file_spec.rb +3 -3
- data/spec/filewatch/watched_files_collection_spec.rb +3 -3
- data/spec/filewatch/winhelper_spec.rb +4 -5
- data/spec/helpers/logging_level_helper.rb +8 -0
- data/spec/helpers/rspec_wait_handler_helper.rb +38 -0
- data/spec/helpers/spec_helper.rb +7 -1
- data/spec/inputs/file_read_spec.rb +54 -24
- data/spec/inputs/file_tail_spec.rb +244 -284
- metadata +13 -3
- data/lib/jars/filewatch-1.0.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7cb22ef0489b15ab92212c179444aefcd5be9e95fc4984c28f83de3397bf54c
|
4
|
+
data.tar.gz: 7a68f15af95595390a4f44e3fd9f641522e1e8ce3feafb7b299b1e056b5b2df7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fedb59ccffbf65f45f1f88dc76cd5e45d126866f75e4eb9271ef44a1e2b7922f98ff49643272cfdfa5f1ac01d445104f8a6c308f71e8255e85cc19c95809122a
|
7
|
+
data.tar.gz: d10e33bf905addfc71a5ce4ce026f3fe4b7f80871b1ba087f41fc3ed7b7981289986ec61a5b4b96ef9bd37af4b26ace1219b99c723032a49a1779ee9e52010f2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 4.1.4
|
2
|
+
- Fixed a regression where files discovered after first discovery were not
|
3
|
+
always read from the beginning. Applies to tail mode only.
|
4
|
+
[#198](https://github.com/logstash-plugins/logstash-input-file/issues/198)
|
5
|
+
- Added much better support for file rotation schemes of copy/truncate and
|
6
|
+
rename cascading. Applies to tail mode only.
|
7
|
+
- Added support for processing files over remote mounts e.g. NFS. Before, it
|
8
|
+
was possible to read into memory allocated but not filled with data resulting
|
9
|
+
in ASCII NUL (0) bytes in the message field. Now, files are read up to the
|
10
|
+
size as given by the remote filesystem client. Applies to tail and read modes.
|
1
11
|
## 4.1.3
|
2
12
|
- Fixed `read` mode of regular files sincedb write is requested in each read loop
|
3
13
|
iteration rather than waiting for the end-of-file to be reached. Note: for gz files,
|
data/JAR_VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.1
|
data/README.md
CHANGED
@@ -2,9 +2,6 @@
|
|
2
2
|
Travis Build
|
3
3
|
[](https://travis-ci.org/logstash-plugins/logstash-input-file)
|
4
4
|
|
5
|
-
Jenkins Build
|
6
|
-
[](https://travis-ci.org/logstash-plugins/logstash-input-file)
|
7
|
-
|
8
5
|
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
9
6
|
|
10
7
|
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
data/docs/index.asciidoc
CHANGED
@@ -78,12 +78,6 @@ Read mode also allows for an action to take place after processing the file comp
|
|
78
78
|
In the past attempts to simulate a Read mode while still assuming infinite streams
|
79
79
|
was not ideal and a dedicated Read mode is an improvement.
|
80
80
|
|
81
|
-
==== Reading from remote network volumes
|
82
|
-
|
83
|
-
The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These
|
84
|
-
remote filesystems typically have behaviors that are very different from local filesystems and
|
85
|
-
are therefore unlikely to work correctly when used with the file input.
|
86
|
-
|
87
81
|
==== Tracking of current position in watched files
|
88
82
|
|
89
83
|
The plugin keeps track of the current position in each file by
|
@@ -103,6 +97,10 @@ A different `sincedb_path` must be used for each input. Using the same
|
|
103
97
|
path will cause issues. The read checkpoints for each input must be
|
104
98
|
stored in a different path so the information does not override.
|
105
99
|
|
100
|
+
Files are tracked via an identifier. This identifier is made up of the
|
101
|
+
inode, major device number and minor device number. In windows, a different
|
102
|
+
identifier is taken from a `kernel32` API call.
|
103
|
+
|
106
104
|
Sincedb records can now be expired meaning that read positions of older files
|
107
105
|
will not be remembered after a certain time period. File systems may need to reuse
|
108
106
|
inodes for new content. Ideally, we would not use the read position of old content,
|
@@ -123,6 +121,19 @@ old sincedb records converted to the new format, this is blank.
|
|
123
121
|
On non-Windows systems you can obtain the inode number of a file
|
124
122
|
with e.g. `ls -li`.
|
125
123
|
|
124
|
+
==== Reading from remote network volumes
|
125
|
+
|
126
|
+
The file input is not thoroughly tested on remote filesystems such as NFS,
|
127
|
+
Samba, s3fs-fuse, etc, however NFS is occasionally tested. The file size as given by
|
128
|
+
the remote FS client is used to govern how much data to read at any given time to
|
129
|
+
prevent reading into allocated but yet unfilled memory.
|
130
|
+
As we use the device major and minor in the identifier to track "last read"
|
131
|
+
positions of files and on remount the device major and minor can change, the
|
132
|
+
sincedb records may not match across remounts.
|
133
|
+
Read mode might not be suitable for remote filesystems as the file size at
|
134
|
+
discovery on the client side may not be the same as the file size on the remote side
|
135
|
+
due to latency in the remote to client copy process.
|
136
|
+
|
126
137
|
==== File rotation in Tail mode
|
127
138
|
|
128
139
|
File rotation is detected and handled by this input, regardless of
|
@@ -130,16 +141,15 @@ whether the file is rotated via a rename or a copy operation. To
|
|
130
141
|
support programs that write to the rotated file for some time after
|
131
142
|
the rotation has taken place, include both the original filename and
|
132
143
|
the rotated filename (e.g. /var/log/syslog and /var/log/syslog.1) in
|
133
|
-
the filename patterns to watch (the `path` option).
|
134
|
-
|
135
|
-
`
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
will not get picked up.
|
144
|
+
the filename patterns to watch (the `path` option).
|
145
|
+
For a rename, the inode will be detected as having moved from
|
146
|
+
`/var/log/syslog` to `/var/log/syslog.1` and so the "state" is moved
|
147
|
+
internally too, the old content will not be reread but any new content
|
148
|
+
on the renamed file will be read.
|
149
|
+
For copy/truncate the copied content into a new file path, if discovered, will
|
150
|
+
be treated as a new discovery and be read from the beginning. The copied file
|
151
|
+
paths should therefore not be in the filename patterns to watch (the `path` option).
|
152
|
+
The truncation will be detected and the "last read" position updated to zero.
|
143
153
|
|
144
154
|
[id="plugins-{type}s-{plugin}-options"]
|
145
155
|
==== File Input Configuration Options
|
data/lib/filewatch/bootstrap.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
require "rbconfig"
|
3
2
|
require "pathname"
|
4
|
-
# require "logstash/environment"
|
5
3
|
|
6
4
|
## Common setup
|
7
5
|
# all the required constants and files
|
@@ -13,36 +11,26 @@ module FileWatch
|
|
13
11
|
# this is used in the read loop e.g.
|
14
12
|
# @opts[:file_chunk_count].times do
|
15
13
|
# where file_chunk_count defaults to this constant
|
16
|
-
|
14
|
+
MAX_ITERATIONS = (2**(0.size * 8 - 2) - 2) / 32768
|
17
15
|
|
18
16
|
require_relative "helper"
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
[fileId, 0, 0] # dev_* doesn't make sense on Windows
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
module UnixInode
|
28
|
-
def prepare_inode(path, stat)
|
29
|
-
[stat.ino.to_s, stat.dev_major, stat.dev_minor]
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
jar_version = Pathname.new(__FILE__).dirname.join("../../JAR_VERSION").realpath.read.strip
|
34
|
-
|
18
|
+
gem_root_dir = Pathname.new(__FILE__).dirname.join("../../").realpath
|
19
|
+
jar_version = gem_root_dir.join("JAR_VERSION").read.strip
|
20
|
+
fullpath = gem_root_dir.join("lib/jars/filewatch-#{jar_version}.jar").expand_path.to_path
|
35
21
|
require "java"
|
36
|
-
|
22
|
+
require fullpath
|
37
23
|
require "jruby_file_watch"
|
38
24
|
|
39
25
|
if LogStash::Environment.windows?
|
40
26
|
require_relative "winhelper"
|
27
|
+
require_relative "stat/windows_path"
|
28
|
+
PathStatClass = Stat::WindowsPath
|
41
29
|
FileOpener = FileExt
|
42
|
-
InodeMixin = WindowsInode
|
43
30
|
else
|
31
|
+
require_relative "stat/generic"
|
32
|
+
PathStatClass = Stat::Generic
|
44
33
|
FileOpener = ::File
|
45
|
-
InodeMixin = UnixInode
|
46
34
|
end
|
47
35
|
|
48
36
|
# Structs can be used as hash keys because they compare by value
|
@@ -54,6 +42,7 @@ module FileWatch
|
|
54
42
|
end
|
55
43
|
|
56
44
|
BufferExtractResult = Struct.new(:lines, :warning, :additional)
|
45
|
+
LoopControlResult = Struct.new(:count, :size, :more)
|
57
46
|
|
58
47
|
class NoSinceDBPathGiven < StandardError; end
|
59
48
|
|
data/lib/filewatch/discoverer.rb
CHANGED
@@ -10,8 +10,8 @@ module FileWatch
|
|
10
10
|
include LogStash::Util::Loggable
|
11
11
|
|
12
12
|
def initialize(watched_files_collection, sincedb_collection, settings)
|
13
|
-
@watching =
|
14
|
-
@exclude =
|
13
|
+
@watching = Concurrent::Array.new
|
14
|
+
@exclude = Concurrent::Array.new
|
15
15
|
@watched_files_collection = watched_files_collection
|
16
16
|
@sincedb_collection = sincedb_collection
|
17
17
|
@settings = settings
|
@@ -21,13 +21,13 @@ module FileWatch
|
|
21
21
|
def add_path(path)
|
22
22
|
return if @watching.member?(path)
|
23
23
|
@watching << path
|
24
|
-
|
24
|
+
discover_files_new_path(path)
|
25
25
|
self
|
26
26
|
end
|
27
27
|
|
28
28
|
def discover
|
29
29
|
@watching.each do |path|
|
30
|
-
|
30
|
+
discover_files_ongoing(path)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -37,7 +37,7 @@ module FileWatch
|
|
37
37
|
@exclude.each do |pattern|
|
38
38
|
if watched_file.pathname.fnmatch?(pattern)
|
39
39
|
if new_discovery
|
40
|
-
logger.
|
40
|
+
logger.trace("Discoverer can_exclude?: #{watched_file.path}: skipping " +
|
41
41
|
"because it matches exclude #{pattern}")
|
42
42
|
end
|
43
43
|
watched_file.unwatch
|
@@ -47,45 +47,52 @@ module FileWatch
|
|
47
47
|
false
|
48
48
|
end
|
49
49
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
50
|
+
def discover_files_new_path(path)
|
51
|
+
discover_any_files(path, false)
|
52
|
+
end
|
53
|
+
|
54
|
+
def discover_files_ongoing(path)
|
55
|
+
discover_any_files(path, true)
|
56
|
+
end
|
57
|
+
|
58
|
+
def discover_any_files(path, ongoing)
|
59
|
+
fileset = Dir.glob(path).select{|f| File.file?(f) && !File.symlink?(f)}
|
60
|
+
logger.trace("discover_files", "count" => fileset.size)
|
61
|
+
fileset.each do |file|
|
56
62
|
pathname = Pathname.new(file)
|
57
|
-
next unless pathname.file?
|
58
|
-
next if pathname.symlink?
|
59
63
|
new_discovery = false
|
60
64
|
watched_file = @watched_files_collection.watched_file_by_path(file)
|
61
65
|
if watched_file.nil?
|
62
|
-
logger.debug("Discoverer discover_files: #{path}: new: #{file} (exclude is #{@exclude.inspect})")
|
63
66
|
new_discovery = true
|
64
|
-
watched_file = WatchedFile.new(pathname, pathname
|
67
|
+
watched_file = WatchedFile.new(pathname, PathStatClass.new(pathname), @settings)
|
65
68
|
end
|
66
69
|
# if it already unwatched or its excluded then we can skip
|
67
70
|
next if watched_file.unwatched? || can_exclude?(watched_file, new_discovery)
|
68
71
|
|
72
|
+
logger.trace("discover_files handling:", "new discovery"=> new_discovery, "watched_file details" => watched_file.details)
|
73
|
+
|
69
74
|
if new_discovery
|
70
|
-
if
|
71
|
-
logger.debug("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago")
|
72
|
-
# on discovery ignorable watched_files are put into the ignored state and that
|
73
|
-
# updates the size from the internal stat
|
74
|
-
# so the existing contents are not read.
|
75
|
-
# because, normally, a newly discovered file will
|
76
|
-
# have a watched_file size of zero
|
77
|
-
# they are still added to the collection so we know they are there for the next periodic discovery
|
78
|
-
watched_file.ignore
|
79
|
-
end
|
80
|
-
# now add the discovered file to the watched_files collection and adjust the sincedb collections
|
81
|
-
@watched_files_collection.add(watched_file)
|
75
|
+
watched_file.initial_completed if ongoing
|
82
76
|
# initially when the sincedb collection is filled with records from the persistence file
|
83
77
|
# each value is not associated with a watched file
|
84
78
|
# a sincedb_value can be:
|
85
79
|
# unassociated
|
86
80
|
# associated with this watched_file
|
87
81
|
# associated with a different watched_file
|
88
|
-
@sincedb_collection.associate(watched_file)
|
82
|
+
if @sincedb_collection.associate(watched_file)
|
83
|
+
if watched_file.file_ignorable?
|
84
|
+
logger.trace("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago")
|
85
|
+
# on discovery ignorable watched_files are put into the ignored state and that
|
86
|
+
# updates the size from the internal stat
|
87
|
+
# so the existing contents are not read.
|
88
|
+
# because, normally, a newly discovered file will
|
89
|
+
# have a watched_file size of zero
|
90
|
+
# they are still added to the collection so we know they are there for the next periodic discovery
|
91
|
+
watched_file.ignore_as_unread
|
92
|
+
end
|
93
|
+
# now add the discovered file to the watched_files collection and adjust the sincedb collections
|
94
|
+
@watched_files_collection.add(watched_file)
|
95
|
+
end
|
89
96
|
end
|
90
97
|
# at this point the watched file is created, is in the db but not yet opened or being processed
|
91
98
|
end
|
@@ -44,7 +44,8 @@ module FileWatch
|
|
44
44
|
:exclude => [],
|
45
45
|
:start_new_files_at => :end,
|
46
46
|
:delimiter => "\n",
|
47
|
-
:file_chunk_count =>
|
47
|
+
:file_chunk_count => MAX_ITERATIONS,
|
48
|
+
:file_chunk_size => FILE_READ_SIZE,
|
48
49
|
:file_sort_by => "last_modified",
|
49
50
|
:file_sort_direction => "asc",
|
50
51
|
}.merge(opts)
|
@@ -19,7 +19,7 @@ module FileWatch module ReadMode module Handlers
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def handle(watched_file)
|
22
|
-
logger.
|
22
|
+
logger.trace("handling: #{watched_file.path}")
|
23
23
|
unless watched_file.has_listener?
|
24
24
|
watched_file.set_listener(@observer)
|
25
25
|
end
|
@@ -34,7 +34,7 @@ module FileWatch module ReadMode module Handlers
|
|
34
34
|
|
35
35
|
def open_file(watched_file)
|
36
36
|
return true if watched_file.file_open?
|
37
|
-
logger.
|
37
|
+
logger.trace("opening #{watched_file.path}")
|
38
38
|
begin
|
39
39
|
watched_file.open
|
40
40
|
rescue
|
@@ -46,7 +46,7 @@ module FileWatch module ReadMode module Handlers
|
|
46
46
|
logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}")
|
47
47
|
watched_file.last_open_warning_at = now
|
48
48
|
else
|
49
|
-
logger.
|
49
|
+
logger.trace("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
|
50
50
|
end
|
51
51
|
watched_file.watch # set it back to watch so we can try it again
|
52
52
|
end
|
@@ -65,13 +65,26 @@ module FileWatch module ReadMode module Handlers
|
|
65
65
|
elsif sincedb_value.watched_file == watched_file
|
66
66
|
update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
67
67
|
else
|
68
|
-
|
68
|
+
msg = "add_or_update_sincedb_collection: the found sincedb_value has a watched_file - this is a rename, switching inode to this watched file"
|
69
|
+
logger.trace(msg)
|
70
|
+
existing_watched_file = sincedb_value.watched_file
|
71
|
+
if existing_watched_file.nil?
|
72
|
+
sincedb_value.set_watched_file(watched_file)
|
73
|
+
logger.trace("add_or_update_sincedb_collection: switching as new file")
|
74
|
+
watched_file.rotate_as_file
|
75
|
+
watched_file.update_bytes_read(sincedb_value.position)
|
76
|
+
else
|
77
|
+
sincedb_value.set_watched_file(watched_file)
|
78
|
+
logger.trace("add_or_update_sincedb_collection: switching from...", "watched_file details" => watched_file.details)
|
79
|
+
watched_file.rotate_from(existing_watched_file)
|
80
|
+
end
|
81
|
+
|
69
82
|
end
|
70
83
|
watched_file.initial_completed
|
71
84
|
end
|
72
85
|
|
73
86
|
def update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
74
|
-
logger.
|
87
|
+
logger.trace("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
|
75
88
|
# sincedb_value is the source of truth
|
76
89
|
watched_file.update_bytes_read(sincedb_value.position)
|
77
90
|
end
|
@@ -79,7 +92,7 @@ module FileWatch module ReadMode module Handlers
|
|
79
92
|
def add_new_value_sincedb_collection(watched_file)
|
80
93
|
sincedb_value = SincedbValue.new(0)
|
81
94
|
sincedb_value.set_watched_file(watched_file)
|
82
|
-
logger.
|
95
|
+
logger.trace("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position)
|
83
96
|
sincedb_collection.set(watched_file.sincedb_key, sincedb_value)
|
84
97
|
end
|
85
98
|
end
|
@@ -5,40 +5,51 @@ module FileWatch module ReadMode module Handlers
|
|
5
5
|
def handle_specifically(watched_file)
|
6
6
|
if open_file(watched_file)
|
7
7
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
8
|
-
|
8
|
+
loop do
|
9
9
|
break if quit?
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
10
|
+
loop_control = watched_file.loop_control_adjusted_for_stat_size
|
11
|
+
controlled_read(watched_file, loop_control)
|
12
|
+
sincedb_collection.request_disk_flush
|
13
|
+
break unless loop_control.more
|
14
|
+
end
|
15
|
+
if watched_file.all_read?
|
16
|
+
# flush the buffer now in case there is no final delimiter
|
17
|
+
line = watched_file.buffer.flush
|
18
|
+
watched_file.listener.accept(line) unless line.empty?
|
19
|
+
watched_file.listener.eof
|
20
|
+
watched_file.file_close
|
21
|
+
key = watched_file.sincedb_key
|
22
|
+
sincedb_collection.reading_completed(key)
|
23
|
+
sincedb_collection.clear_watched_file(key)
|
24
|
+
watched_file.listener.deleted
|
25
|
+
watched_file.unwatch
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def controlled_read(watched_file, loop_control)
|
31
|
+
logger.trace("reading...", "iterations" => loop_control.count, "amount" => loop_control.size, "filename" => watched_file.filename)
|
32
|
+
loop_control.count.times do
|
33
|
+
begin
|
34
|
+
result = watched_file.read_extract_lines(loop_control.size) # expect BufferExtractResult
|
35
|
+
logger.info(result.warning, result.additional) unless result.warning.empty?
|
36
|
+
result.lines.each do |line|
|
37
|
+
watched_file.listener.accept(line)
|
38
|
+
# sincedb position is independent from the watched_file bytes_read
|
39
|
+
delta = line.bytesize + @settings.delimiter_byte_size
|
40
|
+
sincedb_collection.increment(watched_file.sincedb_key, delta)
|
41
41
|
end
|
42
|
+
rescue EOFError
|
43
|
+
logger.error("controlled_read: eof error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8))
|
44
|
+
break
|
45
|
+
rescue Errno::EWOULDBLOCK, Errno::EINTR
|
46
|
+
logger.error("controlled_read: block or interrupt error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8))
|
47
|
+
watched_file.listener.error
|
48
|
+
break
|
49
|
+
rescue => e
|
50
|
+
logger.error("controlled_read: general error reading file", "path" => watched_file.path, "error" => e.inspect, "backtrace" => e.backtrace.take(8))
|
51
|
+
watched_file.listener.error
|
52
|
+
break
|
42
53
|
end
|
43
54
|
end
|
44
55
|
end
|
@@ -13,6 +13,11 @@ module FileWatch module ReadMode module Handlers
|
|
13
13
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
14
14
|
# can't really stripe read a zip file, its all or nothing.
|
15
15
|
watched_file.listener.opened
|
16
|
+
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
17
|
+
# should we track lines read in the sincedb and
|
18
|
+
# fast forward through the lines until we reach unseen content?
|
19
|
+
# meaning that we can quit in the middle of a zip file
|
20
|
+
key = watched_file.sincedb_key
|
16
21
|
begin
|
17
22
|
file_stream = FileInputStream.new(watched_file.path)
|
18
23
|
gzip_stream = GZIPInputStream.new(file_stream)
|
@@ -31,8 +36,8 @@ module FileWatch module ReadMode module Handlers
|
|
31
36
|
logger.error("Cannot decompress the gzip file at path: #{watched_file.path}")
|
32
37
|
watched_file.listener.error
|
33
38
|
else
|
34
|
-
|
35
|
-
sincedb_collection.
|
39
|
+
sincedb_collection.store_last_read(key, watched_file.last_stat_size)
|
40
|
+
sincedb_collection.request_disk_flush
|
36
41
|
watched_file.listener.deleted
|
37
42
|
watched_file.unwatch
|
38
43
|
ensure
|
@@ -42,7 +47,7 @@ module FileWatch module ReadMode module Handlers
|
|
42
47
|
close_and_ignore_ioexception(gzip_stream) unless gzip_stream.nil?
|
43
48
|
close_and_ignore_ioexception(file_stream) unless file_stream.nil?
|
44
49
|
end
|
45
|
-
sincedb_collection.
|
50
|
+
sincedb_collection.clear_watched_file(key)
|
46
51
|
end
|
47
52
|
|
48
53
|
private
|